[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Condor-users] GUI for quill mining



Dear Steven

  Its not graphical, but part of our Web Services interface does 
"condor_q" on the terminal using the method described in Matthew and 
Todd's "Developer APIs to Condor + A Tutorial on Condor Web Services" 
powerpoint.  Have a look at the printJobStatus () methods of our 
CondorJobStatus.java, which I have attached.

  Good luck with your project!
 
Sean

Steven Timm wrote:

>
>Has anyone out there done work on a graphical frontend that
>displays historical data (job data mostly) from the quill/postgres
>database? We have a summer student working on that for us this
>summer but if he can start from something that partly works it could
>be a help.  Any pointers are appreciated.
>
>Thanks
>
>Steve Timm
>
/*
 * CondorJobStatus.java
 *
 * Created on December 1, 2007, 11:03 PM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

package condorwsgui;

import birdbath.ClassAd;
import birdbath.Schedd;
import birdbath.Transaction;
import condor.ClassAdStructAttr;
import condor.FileInfo;
import java.lang.Thread;
import java.net.URL;
import java.io.File;
import java.rmi.RemoteException;
import java.io.FileNotFoundException;
//import java.util.regex.*;

/**
 * See slides 30 and 32 of the "Developer APIs to Condor + a Tutorial on Condor's 
 * Web Services Interface" PPT for documentation
 *
 * @author Written by David Gong; updated and expanded by Sean Manning
 */
public class CondorJobStatus {

	/*
    private static enum OS {LINUX, WINDOWS, OTHERS};
    private OS myOS;	// Represents the family of operating system (eg. Linux or Windows) running on the system
	*/
 
	
	/*
	 * These constants are used with the various isJobFoo () methods
	 */
	// See http://pages.cs.wisc.edu/~adesmet/status.html for a reference.
	private static int STATUS_UNEXPANDED = 0; // U
	private static int STATUS_IDLE = 1;       // I
	private static int STATUS_RUNNING = 2;    // R
	private static int STATUS_REMOVED = 3;    // X
	private static int STATUS_COMPLETE = 4;   // C
	private static int STATUS_HOLD = 5;       // H
	
    /*
     * These variables all represent aspects of condor or facts about the system
     */
    private Schedd schedd;	// Represents the Condor schedd daemon
    private String owner;	// SM 
    private autoUpdate myAutoUpdate; // This is an inner class of CondorJobStatus
            
    private ClassAdStructAttr[][] jobsAttr;	// An ordered list of jobs, which consist of ordered lists of attributes
    private String[] colSelected;
    
    // The titles of the six columns of job status output.
    private String[] defaultColSelected = {"ClusterId", "ProcId", "Owner", "Qdate", "JobStatus", "Cmd"};
    
    // Represents the possible states that a Condor job can be in.  
    // Padded to 8-10 characters long.
    private static String[] statusName = { "         ", "Idle     ", "Running  ", "Removed ", "Completed", "Held     "};
    
    // The path to the default directory for storing output files
    private String defaultOutputFileDir;
    
    /** Creates a new instance of CondorJobStatus */
    public CondorJobStatus() {
        colSelected = defaultColSelected;
        defaultOutputFileDir = System.getProperty("user.home");
    }
    
    /**
     * Detetermines which OS the system uses, assigning the correct value to myOS.
     */
/*	private void determinteOS(){	// TODO Is this needed?
		String myOSName = System.getProperty("os.name");
        Pattern p1 = Pattern.compile("WIN"); // Compiles regular expression into Pattern.
        Pattern p2 = Pattern.compile("Lin");
        Matcher m1 = p1.matcher(myOSName);
        Matcher m2 = p2.matcher(myOSName);
        if (m1.find()) {
            myOS = OS.WINDOWS;
        }
        else if (m2.find()) {
            myOS = OS.LINUX;
        }
        else {
            myOS = OS.OTHERS;
        }
    }*/
    
	/**
	 * Assigns a new array to colSelected.  TODO What is this for?
	 * @param col The value to assign to colSelected 
	 */
    public void setColoumn(String[] col){
        colSelected = col;
    }

    /**
     * Start updating jobsAttr regularly with every job on the queue.
     * @throws Exception
     */
    public void updateJobStatus() throws Exception {
        updateAdFromServer();
        myAutoUpdate = new autoUpdate(30000);
        myAutoUpdate.start();
    }

    /**
     * Stop updating jobsAttr regularly with every job on the queue.
     */
    public void stopAutoUpdate() {
        myAutoUpdate.stopAutoUpdate();
    }
    
    /**
     * Start retrieving the output of every Completed job to a fixed location.
     */
    public void startAutoRetrieve () {
    	myAutoUpdate.startAutoRetrieve();
    }
    
    /**
     * Stop retrieving the output of every Completed job to a fixed location.
     */
    public void stopAutoRetrieve () {
    	myAutoUpdate.stopAutoRetrieve();
    }
    
    /**
     * Prints the status of one job.
     * @param ad The ClassAd from which the Job Status will be retrieved
     */
    public void printJobStatus(ClassAd ad){
        String message = "";	// The one-line message to print
        int status;
        
        // For each column, add something to the message
        for (int i = 0; i < colSelected.length; i++){
            if (colSelected[i].equalsIgnoreCase("JobStatus")){
                status = Integer.valueOf (ad.get (colSelected[i]));
                message += (statusName[status] + "\t");
            }
            else {
                message += (ad.get(colSelected[i]) + "\t" );
            }
        }
        System.out.println(message);
    }

    /**
     * Prints the job status of every job recorded in jobsAttr, which should be
     * every job known to the local scheduler.  
     */
    public void printJobsStatus(){
        printTitle();	// Label the columns
        if(jobsAttr == null) {
            try {this.wait();}
            catch( Exception err){
                System.out.println("Waiting for update to finish.....");
            }
        }

        for (int i=0; i < jobsAttr.length; i ++){
            ClassAd ad = new ClassAd(jobsAttr[i]);
            printJobStatus(ad);
        }
        
    }

    /**
     * Prints the titles of the six columns of printJobStatus output.
     */
    private void printTitle(){
        String title = "";
        for (int i = 0; i < colSelected.length; i++){
            title += (colSelected[i] + "\t");
        }
        
        System.out.println(title);        
    }
	
    /**
     * Prints the job status of a particular job
     * @param cluster 
     * @param job Web Services
     * @throws RemoteException 
     */
	public void printJobStatus(int cluster, int job) throws RemoteException{
		Transaction xact = schedd.createTransaction();
		xact.begin(30);
		ClassAd ad = new ClassAd(xact.getJobAd(cluster, job));
		xact.commit();
		
		printTitle();
		printJobStatus(ad);
	}
  
	/**
	 * At regular intervals, an autopUpdate updates jobsAttr to store all jobs 
	 * which the local schedd is aware of, and retrieves any job which is complete.
	 * updateAdFromServer ()
	 * retrieveJobsIfDone ()
	 * @author Written by David Gong; commented by Sean Manning
	 * 
	 */
    class autoUpdate extends Thread {
        private long interVal = 30000;		// Measured in ms so 30,000 = 30 s
        private boolean needStop = false;	// Should run () stop?
        private boolean autoRetrieve = true;
        
        /**
         * Creates a new autoUpdate
         * @param interval The new interval in ms
         */
        public autoUpdate (long interval) {
            interVal = interval;
            needStop = false;
            autoRetrieve = false;
        }
        
        /**
         * Causes this autoUpdate () to stop permanently after  
         *
         */
        public void stopAutoUpdate () {
            needStop = true;
            System.out.println ("Current need to stop ('true' expected):" + needStop);
        }
        
        /**
         * Stop retrieving the output of every Complete job and letting it die.
         */
        public void startAutoRetrieve () {
        	autoRetrieve = true;
        }

        /**
         * Stop retrieving the output of every Complete job and letting it die.
         */
        public void stopAutoRetrieve () {
        	autoRetrieve = false ;
        }
        
        /**
         * Performs several operations automatically every interVal ms
         */
        public void run () {
            
            while (! needStop) {
               System.out.println ("Current need to stop ('false' expected):" + needStop); 
               try {
            	   updateAdFromServer ();
            	   if (autoRetrieve) {
            		   retrieveJobsIfDone ();
            	   }
               }
               catch (Exception err) {
                   System.out.println ("Update from server failed in autoUpdate.run ()");
               }
               try { Thread.sleep(interVal); }
               catch (Exception err) {
                   System.out.println ("Sleep failed in autoUpdate.run ()");
               }
                
            }
            System.out.println("AutoUpdate is no longer running....");
        } 
    }

    /**
     * Sets jobsAttr to store all jobs which the schedd knows about with a particular owner.
     * 
     * @throws RemoteException If there is a problem contacting the schedd
     */
    private void updateAdFromServer() throws RemoteException{
    	// Get all jobs with me as the owner (cp. slide 30 of PPT)
    	// SM Returns a ClassAdStructAttr[] 
    	// SM SocketException thrown from here 3
    	jobsAttr = schedd.getJobAds("Owner==\"" + owner + "\""); // SM Was "Owner==\"daobgong\""
    	System.out.println("update from server finished");
    }

    /**
     * Assigns a new Schedd to this CondorJobStatus
     * @param schedd The Schedd to be associated with this object
     */
    public void setSchedd(Schedd schedd){
    	this.schedd = schedd;
    }
    
    /**
     * Sets the owner of this CondorJobStatus
     * @param owner The name of the new owner
     */
    public void setOwner (String owner) {
    	this.owner = owner;
    }
    
    /**
     * This is the job retrieval method which underlies all the others.  It copies 
     * back files from the spool/cluster folder on the Condor/Globus server to a directory on the client machine.
     * Right now, that directory is /hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon
     * unless you give the full path to another directory such as /tmp
     * 
     * See slide 32 of "Developer APIs to Condor" PPT for a model.
     * @param cluster The cluster number of the job (the part of the job number before the period)
     * @param job The job number of the job (the part of the job number after the period)
     * @throws RemoteException Thrown by birdbath.Transaction.getFile () ???
     * @throws FileNotFoundException Thrown by birdbath.Transaction.getFile () ???
     * @throws Exception Thrown by birdbath.Transaction.getFile () ???
     */
    public void retrieveJob(int cluster, int job) throws RemoteException, FileNotFoundException, Exception{
    	System.out.println ("In RetrieveJob (" + cluster + ", " + job + ")");
    	Transaction xact = schedd.createTransaction();
        xact.begin(30);
	// Check for an OutputSandbox attribute
        ClassAd ad = new ClassAd (xact.getJobAd (cluster, job));
        if (ad.get ("OutputSandbox") != null) {
        	/*
        	 * If there is an OutputSandbox, retrieve all files specified in it.
        	 */
        	System.out.println ("OutputSandbox detected ...");
        	ad.get ("OutputSandbox");
        	// TODO Implement.
        }
//	else {
        /*
         * If there is no OutputSandbox, retrieve all files in the spool/cluster folder
         */
        FileInfo[] files = xact.listSpool(cluster, job); // "Discover available files" (PPT)
//        ClassAd ad = new ClassAd (xact.getJobAd (cluster, job));
        System.out.println ("user.dir: " + getOutputLocalLocation(ad)); // TODO How is this defined?
        System.setProperty("user.dir", getOutputLocalLocation(ad));
        for (FileInfo file:files) { // file describes the remote file; 
        	System.out.println ("Remote name of file to stage out: " + file.getName ());
//          System.out.println ("New local path will be: " + file.getName ());
//          System.out.println ("New local path will be: /tmp/" + file.getName());
        	System.out.println ("New local path will be: " + getOutputLocalLocation(ad) + "/" +  file.getName());
//          xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File(file.getName()));
//          xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File("/tmp/" +  file.getName()));
        	xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File(getOutputLocalLocation(ad) + "/" +  file.getName()));
        }
        xact.closeSpool(cluster, job);
        xact.removeJob(cluster, job, "Successfully retrieved result, close this job");
        xact.commit();
//	}
    }
    
    /**
     * Copies back output files from the job represented by a particular ClassAd
     * @param ad
     * @throws RemoteException
     * @throws FileNotFoundException
     * @throws Exception
     */
    public void retrieveJob(ClassAd ad) throws RemoteException, FileNotFoundException, Exception{
        int cluster = clusterIdOf(ad);
        int job = jobIdOf(ad);
        retrieveJob(cluster, job); 
    }
    
    public void retrieveJob (int cluster, int job, String outputSandbox) {;}
    
    /**
     * Retrieves the Class Ad of any job which is done
     */
    public void retrieveJobsIfDone(){
        for (int i=0; i < jobsAttr.length; i ++){
            ClassAd ad = new ClassAd(jobsAttr[i]);
            try{
                if (isJobComplete(ad)) retrieveJob(ad);
            } catch(RemoteException err){
                System.out.print("Error on remote access output files");
            } catch (FileNotFoundException err){
                System.out.println("File not found while retrieve output files");
            } catch(Exception err){
                System.out.println("Error while retrieving output files");
            }
        }
    }
    
    /**
     * Gets the job ID of a job
     * @param ad The ClassAd of a job
     * @return The cluster ID of that job
     */
    private int clusterIdOf(ClassAd ad){
        return Integer.valueOf(ad.get("ClusterId"));
    }

    /**
     * Gets the job Id of a job
     * @param ad The ClassAd of a job
     * @return The job ID of that job
     */
    private int jobIdOf(ClassAd ad){
        return Integer.valueOf(ad.get("ProcId"));
    }

    /**
     * Moves a job into Old Age (the history log) by removing the job from the queue
     * and closing down its files in the spool directory.
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @param reason The reason why the job is being removed
     * @throws RemoteException Thrown by the Transaction in case of a problem making the change
     */
    public void closeSpoolAndRemoveJob(int cluster, int job, String reason)throws RemoteException{
        Transaction xact = schedd.createTransaction();
        xact.begin(30);
        xact.closeSpool(cluster, job);
        xact.removeJob(cluster, job, reason);
        xact.commit();
        
    }
    
    /**
     * Removes a particular job from the queue.
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @param reason The reason why this action is being done
     * @throws RemoteException Thrown by the Transaction in case of a problem making the change
     */
    public void removeJob(int cluster, int job, String reason)throws RemoteException{
        Transaction xact = schedd.createTransaction();
        xact.begin(30);
        xact.removeJob(cluster, job, reason);
        xact.commit();       
    }
    
    /**
     * Halts a job but leaves it in the queue.
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @param reason The reason why this action is being done
     * @throws RemoteException Thrown by the Transaction in case of a problem making the change
     */
    public void holdJob(int cluster, int job, String reason)throws RemoteException{
       Transaction xact = schedd.createTransaction();
        xact.begin (30);
        xact.holdJob (cluster, job, reason);
        xact.commit ();
    }
    
    /**
     * ???
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @param reason The reason why this action is being done
     * @throws RemoteException Thrown by the Transaction in case of a problem making the change
     */
    public void releaseJob(int cluster, int job, String reason) throws RemoteException{
        Transaction xact = schedd.createTransaction();
        xact.begin(30);
        xact.releaseJob(cluster, job, reason);
        xact.commit();
    }
    
    /**
     * Determines whether or not a job is complete
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @return True if the job is complete, false otherwise.
     */
    public boolean isJobComplete(int cluster, int job){
    	// Old code by DG
/*        ClassAd ad;
        int tmpCluster, tmpJob;
        
        for (int i=0; i < jobsAttr.length; i ++){
            ad = new ClassAd(jobsAttr[i]);
            tmpCluster = Integer.valueOf(ad.get("ClusterId"));
            tmpJob = Integer.valueOf(ad.get("ProcId"));
            if ((tmpCluster == cluster) && (job == tmpJob)){
                return isJobComplete(ad);
            }
        }
        return false;*/
    	return jobHasStatus (cluster, job, STATUS_RUNNING); 
    }

	// Old code by DG
    // TODO Replace reference to this method.
    /**
     * Determines whether or not a job is complete
     * 
     * @param ad The ClassAd of the job.
     * @return True if the job is complete, false otherwise
     */
    private boolean isJobComplete(ClassAd ad){
        return (Integer.valueOf(ad.get("JobStatus")) == 4); // Complete 
    }

    /**
     * Determines whether or not a job is in state Idle
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @return True if the job exists and is idle, false otherwise.
     */
    public boolean isJobIdle (int cluster, int job) {
       return jobHasStatus (cluster, job, STATUS_IDLE);      
    }
    
    /**
     * Determines whether or not a job is in state Running
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @return True if the job exists and is running, false otherwise.
     */
    public boolean isJobRunning (int cluster, int job) {
       return jobHasStatus (cluster, job, STATUS_RUNNING);      
    }
    
    /**
     * Determines whether or not a job is in state Hold
     * 
     * @param cluster The cluster ID of the job
     * @param job The job ID of the job
     * @return True if the job exists and is being held, false otherwise.
     */
    public boolean isJobHold (int cluster, int job) {
       return jobHasStatus (cluster, job, STATUS_HOLD);      
    }
    
    /**
     * Returns true if the job with ID cluster.job has job status 'status'
     * 
     * Status should be between zero and five, and if it is out of range then the output 
     * will always be false
     * @param cluster The cluster ID of the job to check
     * @param job The job ID of the job to check
     * @param status A job status code (defined from zero to five)
     * @return
     */
    private boolean jobHasStatus (int cluster, int job, int status) {
        ClassAd ad;
        int tmpCluster, tmpJob;
        
        for (int i=0; i < jobsAttr.length; i ++){
            ad = new ClassAd(jobsAttr[i]);
            tmpCluster = Integer.valueOf(ad.get("ClusterId"));
            tmpJob = Integer.valueOf(ad.get("ProcId"));
            if ((tmpCluster == cluster) && (job == tmpJob)){
                return jobHasStatus (ad, status);
            }
        }
    	return false;
    }
    
    /**
     * Returns true if the job represented by ad has job status 'status'
     * 
     * Status should be between zero and five, and if it is out of range then the output 
     * will always be false.
     * @param ad The job to check
     * @param status A job status code (defined from zero to five)
     * @return True if ad exists and the status of ad is equal to 'status'
     */
    private boolean jobHasStatus (ClassAd ad, int status) {
        return (Integer.valueOf(ad.get("JobStatus")) == status);
    }
    
    /**
     * Gets the local folder where the output of a job is being stored.
     * 
     * @param ad The ClassAd of the job
     * @return The absolute path to the folder where output is being stored
     */
    public String getOutputLocalLocation(ClassAd ad){
        String tmp = ad.get("OutputFileDir");
        if (tmp == null) 
            return defaultOutputFileDir;
        else
            return tmp;                   
    }
    
    /**
     * Gets the interval at which this object's schedd is updated with a current list of jobs.
     * @return 0 if autoUpdate does not exist, otherwise the auto-update interval in ms
     */
    public long getAutoUpdateInterval () {
    	if (myAutoUpdate != null) {
    		return myAutoUpdate.interVal;
    	}
    	else {
    		return 0;
    	}
    }
    
    /**
     * A simple test method for this class.  Gets the status of one or more jobs and prints it to the screen.
     * 
     * @param args This function does not take command-line arguments.
     */
    public static void main (String[] args) {
    	/*
    	 * Setting system properties (cp. slide 38 of "Developer APIs to Condor" PPT
    	 */
    	// For windows, commented out by DG
 //      java.lang.System.setProperty("javax.net.ssl.trustStore", "c:\\Program Files\\Java\\jre1.6.0_03\\bin\\truststore");
 //      java.lang.System.setProperty("javax.net.ssl.keyStore", "c:\\Users\\Daobgong\\JavaProject\\CondorWSGUI\\DavidGridKeyStore");

    	// For Unix/Linux
    	java.lang.System.setProperty("javax.net.ssl.trustStore", "/hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon/src/supportfiles/truststore");
    	java.lang.System.setProperty("javax.net.ssl.keyStore", "/hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon/src/supportfiles/keystore");
    	
    	java.lang.System.setProperty("javax.net.ssl.keyStoreType", "PKCS12");
    	java.lang.System.setProperty("javax.net.ssl.keyStorePassword", "An5sh6An3-");

    	/*
    	 * Create a Schedd listening on the correct port.
    	 */
    	Schedd mySchedd = null;
        try { // Listen on a particular port
//            String tmpStr="https://ugdev01.phys.uvic.ca:1980";; // DG commented this out
            String tmpStr="https://babargt4.phys.uvic.ca:1980";;
            URL scheddLocation = new URL(tmpStr);
            mySchedd = new Schedd(scheddLocation);
        } 
        catch (Exception err) {
            System.out.println("Failed to create scheduler, System is exiting.");
            System.exit(-1);
        }

        /*
         * Create a CondorJobStatus to monitor the job in question using the new Schedd
         */
    	CondorJobStatus myStatus = new CondorJobStatus();
        myStatus.setSchedd(mySchedd);
        myStatus.setOwner ("seangwm");

        try {
        	System.out.println("+++ Trying three operations");
            myStatus.updateJobStatus(); System.out.println("+++ Updated job status");
            myStatus.startAutoRetrieve ();
            
            myStatus.printJobsStatus(); System.out.println("+++ Print job status");
            myStatus.stopAutoUpdate(); System.out.println("+++ Stop auto update");
        }
        catch (Exception err) {
        	err.printStackTrace();
            System.out.println("Error on submit");
        }
        
        // Job 5450.0 by SM is Hold
/*        if (myStatus.isJobIdle (5450, 0)) {
        	System.out.println ("Error in isJobIdle when False expected on 5450.0.");
        }
        if (myStatus.isJobRunning (5450, 0)) {
        	System.out.println ("Error in isJobRunning when False expected on 5450.0.");
        }
        if (myStatus.isJobComplete (5450, 0)) {
        	System.out.println ("Error in isJobComplete when False expected on 5450.0.");
        }
        if (!myStatus.isJobHold (5450, 0)) {
        	System.out.println ("Error in isJobHold when True expected on 5450.0.");
        }*/
        
        // Job 5267.0 by DG is Idle
/*        if (myStatus.isJobIdle (5585, 0)) {
        	System.out.println ("Error in isJobIdle when False expected.");
        }
        if (!myStatus.isJobRunning (5585, 0)) {
        	System.out.println ("Error in isJobRunning when True expected.");
        }
        if (myStatus.isJobComplete (5585, 0)) {
        	System.out.println ("Error in isJobComplete when False expected.");
        }
        if (myStatus.isJobHold (5585, 0)) {
        	System.out.println ("Error in isJobHold when False expected.");
        }*/
    }    
}