[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Condor-users] No matches being made




I have 63 jobs in the queue waiting, to run, none of them are runnign.
All claim "no match found"

Below is condor_q -long of one of the jobs, and condor_status of a
typical node


-- Submitter: fngp-osg.fnal.gov : <131.225.167.42:39082> : fngp-osg.fnal.gov
MyType = "Job"
TargetType = "Machine"
ClusterId = 53602
QDate = 1137791144
CompletionDate = 0
Owner = "cdf"
RemoteWallClockTime = 0.000000
LocalUserCpu = 0.000000
LocalSysCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteSysCpu = 0.000000
ExitStatus = 0
NumCkpts = 0
NumRestarts = 0
NumSystemHolds = 0
CommittedTime = 0
TotalSuspensions = 0
LastSuspensionTime = 0
CumulativeSuspensionTime = 0
ExitBySignal = FALSE
CondorVersion = "$CondorVersion: 6.7.13 Nov  7 2005 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RH9 $"
RootDir = "/"
Iwd = "/home/cdf/gram_scratch_yb5ESBWPyS"
JobUniverse = 5
Cmd = "/home/cdf/.globus/.gass_cache/local/md5/32/59/e2/a0d282aae17d5015ad384ac9b2/md5/51/6f/f7/a80caf8d1b838d83533aa5a5bd/data"
MinHosts = 1
MaxHosts = 1
CurrentHosts = 0
WantRemoteSyscalls = FALSE
WantCheckpoint = FALSE
RemoteSpoolDir = "/local/stage1/condor/spool/cluster53602.proc0.subproc0"
JobStatus = 1
EnteredCurrentStatus = 1137791144
JobPrio = 0
User = "cdf@xxxxxxxx"
NiceUser = FALSE
Env = "OSG_DATA=/grid/data;GRID3_TMP_WN_DIR=/local/stage1;GRID3_APP_DIR=/grid/app;hostname=fngp-osg.fnal.gov;GRID3_SPONSOR=fermilab;OSG_LOCATION=/export/osg/grid;LD_LIBRARY_PATH=/export/osg/grid/MonaLisa/Service/VDTFarm/pgsql/lib:/export/osg/grid/voms/lib:/export/osg/grid/prima/lib:/export/osg/grid/globus/lib:/export/osg/grid/expat/lib:/export/osg/grid/expat/lib:;GRID3_SITE_NAME=FNAL_GPFARM;OSG_JOB_CONTACT=fngp-osg.fnal.gov/jobmanager-condor;OSG_SITE_INFO=http://fermigrid.fnal.gov/gpfarm/FNAL_GPFARM_POLICY.html;GRID3_DATA_DIR=/grid/data;OSG_DEFAULT_SE=UNAVAILABLE;OSG_GRID=/local/ups/grid;GRID3_SITE_INFO=http://fermigrid.fnal.gov/gpfarm/FNAL_GPFARM_POLICY.html;LOGNAME=cdf;OSG_SPONSOR=fermilab;OSG_SITE_NAME=FNAL_GPFARM;GRID3_JOB_CONTACT=fngp-osg.fnal.gov/jobmanager-condor;GRID3_USER_VO_MAP=/export/osg/grid/monitoring/grid3-user-vo-map.txt;OSG_USER_VO_MAP=/export/osg/grid/monitoring/grid3-user-vo-map.txt;HOME=/home/cdf;OSG_WN_TMP=/local/stage1;GRID3_GRIDFTP_LOG=/export/osg/grid/globus/var/gridftp.log;OSG_SITE_READ=UNAVAILABLE;OSG_UTIL_CONTACT=fngp-osg.fnal.gov/jobmanager;GRID3_TMP_DIR=/grid/data;OSG_GRIDFTP_LOG=/export/osg/grid/globus/var/gridftp.log;OSG_APP=/grid/app;GRID3_BASE_DIR=/export/osg/grid;GLOBUS_LOCATION=/export/osg/grid/globus;GLOBUS_GRAM_JOB_CONTACT=https://fngp-osg.fnal.gov:29834/19565/1137791140/;GLOBUS_REMOTE_IO_URL=/home/cdf/.globus/job/fngp-osg.fnal.gov/19565.1137791140/remote_io_url;OSG_SITE_WRITE=UNAVAILABLE;GLOBUS_GRAM_MYJOB_CONTACT=URLx-nexus://fngp-osg.fnal.gov:29844/;SCRATCH_DIRECTORY=/home/cdf//gram_scratch_yb5ESBWPyS;GRID3_UTIL_CONTACT=fngp-osg.fnal.gov/jobmanager;X509_USER_PROXY=/home/cdf/.globus/job/fngp-osg.fnal.gov/19565.1137791140/x509_up";
JobNotification = 0
WantRemoteIO = TRUE
UserLog = "/export/osg/grid/globus/tmp/gram_job_state/gram_condor_log"
UserLogUseXML = TRUE
CoreSize = 0
KillSig = "SIGTERM"
Rank = (Mips)
In = "/dev/null"
StreamIn = FALSE
Out = "/home/cdf/.globus/job/fngp-osg.fnal.gov/19565.1137791140/stdout"
StreamOut = FALSE
Err = "/home/cdf/.globus/job/fngp-osg.fnal.gov/19565.1137791140/stderr"
StreamErr = FALSE
BufferSize = 524288
BufferBlockSize = 32768
ShouldTransferFiles = "NO"
TransferFiles = "NEVER"
ImageSize = 5
ExecutableSize = 5
DiskUsage = 5
Requirements = (OpSys == "LINUX" && Arch == "INTEL") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (TARGET.FileSystemDomain == MY.FileSystemDomain)
FileSystemDomain = "fnal.gov"
PeriodicHold = FALSE
PeriodicRelease = FALSE
PeriodicRemove = FALSE
OnExitHold = FALSE
OnExitRemove = TRUE
LeaveJobInQueue = FALSE
Args = "std OSG http://fcdfhead2.fnal.gov/condorcafstage//glidein_webgp_4";
accountinggroup = "group_cdf.cdf"
GlobalJobId = "fngp-osg.fnal.gov#1137791144#53602.0"
ProcId = 0
WantMatchDiagnostics = TRUE
LastRejMatchReason = "no match found"
LastRejMatchTime = 1137869767
ServerTime = 1137869871

MyType = "Machine"
TargetType = "Job"
Name = "vm1@xxxxxxxxxxxxxxxx"
Machine = "fnpc155.fnal.gov"
Rank = 0.000000
CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = "fngp-osg.fnal.gov"
CondorVersion = "$CondorVersion: 6.7.13 Nov  7 2005 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RH9 $"
VirtualMachineID = 1
VirtualMemory = 2096328
Disk = 45153724
CondorLoadAvg = 0.000000
LoadAvg = 0.000000
KeyboardIdle = 263460
ConsoleIdle = 263460
Memory = 1004
Cpus = 1
StartdIpAddr = "<131.225.167.155:32772>"
Arch = "INTEL"
OpSys = "LINUX"
UidDomain = "fnal.gov"
FileSystemDomain = "fnal.gov"
Subnet = "131.225.167"
HasIOProxy = TRUE
TotalVirtualMemory = 4192656
TotalDisk = 90307448
TotalCpus = 2
TotalMemory = 2009
KFlops = 805152
Mips = 3107
LastBenchmark = 1137857814
TotalLoadAvg = 0.050000
TotalCondorLoadAvg = 0.000000
ClockMin = 776
ClockDay = 6
TotalVirtualMachines = 2
HasFileTransfer = TRUE
HasPerFileEncryption = TRUE
HasReconnect = TRUE
HasMPI = TRUE
HasTDP = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
HasPVM = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList = "HasFileTransfer,HasPerFileEncryption,HasReconnect,HasMPI,HasTDP,HasJICLocalConfig,HasJICLocalStdin,HasPVM,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0
CpuIsBusy = FALSE
TimeToLive = 2147483647
State = "Unclaimed"
EnteredCurrentState = 1137828641
Activity = "Idle"
EnteredCurrentActivity = 1137857814
Start = TRUE
Requirements = START
MaxJobRetirementTime = 0
CurrentRank = 0.000000
MonitorSelfTime = 1137869660
MonitorSelfCPUUsage = 0.000000
MonitorSelfImageSize = 7836.000000
MonitorSelfResidentSetSize = 3916
MonitorSelfAge = 263251
DaemonStartTime = 1137606353
UpdateSequenceNumber = 1029
MyAddress = "<131.225.167.155:32772>"
LastHeardFrom = 1137869818
UpdatesTotal = 1011
UpdatesSequenced = 1010
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"

MyType = "Machine"
TargetType = "Job"
Name = "vm2@xxxxxxxxxxxxxxxx"
Machine = "fnpc155.fnal.gov"
Rank = 0.000000
CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = "fngp-osg.fnal.gov"
CondorVersion = "$CondorVersion: 6.7.13 Nov  7 2005 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RH9 $"
VirtualMachineID = 2
VirtualMemory = 2096328
Disk = 45153724
CondorLoadAvg = 0.000000
LoadAvg = 0.050000
KeyboardIdle = 69817
ConsoleIdle = 263460
Memory = 1004
Cpus = 1
StartdIpAddr = "<131.225.167.155:32772>"
Arch = "INTEL"
OpSys = "LINUX"
UidDomain = "fnal.gov"
FileSystemDomain = "fnal.gov"
Subnet = "131.225.167"
HasIOProxy = TRUE
TotalVirtualMemory = 4192656
TotalDisk = 90307448
TotalCpus = 2
TotalMemory = 2009
KFlops = 805152
Mips = 3107
LastBenchmark = 1137857814
TotalLoadAvg = 0.050000
TotalCondorLoadAvg = 0.000000
ClockMin = 776
ClockDay = 6
TotalVirtualMachines = 2
HasFileTransfer = TRUE
HasPerFileEncryption = TRUE
HasReconnect = TRUE
HasMPI = TRUE
HasTDP = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
HasPVM = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList = "HasFileTransfer,HasPerFileEncryption,HasReconnect,HasMPI,HasTDP,HasJICLocalConfig,HasJICLocalStdin,HasPVM,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0
CpuIsBusy = FALSE
TimeToLive = 2147483647
State = "Unclaimed"
EnteredCurrentState = 1137801718
Activity = "Idle"
EnteredCurrentActivity = 1137857809
Start = TRUE
Requirements = START
MaxJobRetirementTime = 0
CurrentRank = 0.000000
MonitorSelfTime = 1137869660
MonitorSelfCPUUsage = 0.000000
MonitorSelfImageSize = 7836.000000
MonitorSelfResidentSetSize = 3916
MonitorSelfAge = 263251
DaemonStartTime = 1137606353
UpdateSequenceNumber = 972
MyAddress = "<131.225.167.155:32772>"
LastHeardFrom = 1137869819
UpdatesTotal = 954
UpdatesSequenced = 953
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"


------------------------------------------------------------------
Steven C. Timm, Ph.D  (630) 840-8525  timm@xxxxxxxx  http://home.fnal.gov/~timm/
Fermilab Computing Div/Core Support Services Dept./Scientific Computing Section
Assistant Group Leader, Farms and Clustered Systems Group
Lead of Computing Farms Team