[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [condor-users] newbie question



When jobs are not running because condor_q -analyize reports:

5 match, but prefer another specific job despite its worse user-priority

I have these requirements:

1) condor_q -l <Your job id>


-- Submitter: sky.ncsa.uiuc.edu : <141.142.15.3:33875> : sky.ncsa.uiuc.edu
MyType = "Job"
TargetType = "Machine"
ClusterId = 7
QDate = 1082743629
CompletionDate = 0
Owner = "remijan"
RemoteWallClockTime = 0.000000
LocalUserCpu = 0.000000
LocalSysCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteSysCpu = 0.000000
ExitStatus = 0
NumCkpts = 0
NumRestarts = 0
NumSystemHolds = 0
CommittedTime = 0
TotalSuspensions = 0
LastSuspensionTime = 0
CumulativeSuspensionTime = 0
ExitBySignal = FALSE
CondorVersion = "$CondorVersion: 6.6.3 Mar 29 2004 $"
CondorPlatform = "$CondorPlatform: I386-LINUX-RH72 $"
RootDir = "/"
Iwd = "/home/remijan/condor-jobs/hello"
JobUniverse = 5
Cmd = "/home/remijan/condor-jobs/hello/a.out"
MinHosts = 1
MaxHosts = 1
CurrentHosts = 0
WantRemoteSyscalls = FALSE
WantCheckpoint = FALSE
JobStatus = 1
EnteredCurrentStatus = 1082743629
JobPrio = 0
User = "remijan@xxxxxxxxxxxxx"
NiceUser = FALSE
Env = ""
JobNotification = 2
UserLog = "/home/remijan/condor-jobs/hello/job.log"
CoreSize = 0
KillSig = "SIGTERM"
Rank = 0.000000
In = "/dev/null"
TransferIn = FALSE
Out = "job.output"
Err = "job.err"
BufferSize = 524288
BufferBlockSize = 32768
ShouldTransferFiles = "YES"
WhenToTransferOutput = "ON_EXIT"
TransferFiles = "ONEXIT"
ImageSize = 14
ExecutableSize = 14
DiskUsage = 14
Requirements = (Arch == "INTEL") && (OpSys == "LINUX") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer)
PeriodicHold = FALSE
PeriodicRelease = FALSE
PeriodicRemove = FALSE
OnExitHold = FALSE
OnExitRemove = TRUE
LeaveJobInQueue = FALSE
Args = ""
ProcId = 0
ServerTime = 1082744270



And a computer that should be able to run the job, is the submiit machine itself:


2) condor_status -l <machine-name>
   Where <machine-name> is a single machine that you think should
   be able to run your job.

MyType = "Machine"
TargetType = "Job"
Name = "vm1@xxxxxxxxxxxxxxxxx"
Machine = "sky.ncsa.uiuc.edu"
Rank = 0.000000
CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = "reksio.ncsa.uiuc.edu"
CondorVersion = "$CondorVersion: 6.6.3 Mar 29 2004 $"
CondorPlatform = "$CondorPlatform: I386-LINUX-RH72 $"
VirtualMachineID = 1
VirtualMemory = 509563
Disk = 858059
CondorLoadAvg = 0.000000
LoadAvg = 0.000000
KeyboardIdle = 41
ConsoleIdle = 64209391
Memory = 505
Cpus = 1
StartdIpAddr = "<141.142.15.3:33876>"
Arch = "INTEL"
OpSys = "LINUX"
UidDomain = "ncsa.uiuc.edu"
FileSystemDomain = "sky.ncsa.uiuc.edu"
Subnet = "141.142.15"
HasIOProxy = TRUE
TotalVirtualMemory = 2038252
TotalDisk = 3432236
KFlops = 686667
Mips = 1355
LastBenchmark = 1082742405
TotalLoadAvg = 0.000000
TotalCondorLoadAvg = 0.000000
ClockMin = 801
ClockDay = 5
TotalVirtualMachines = 4
HasFileTransfer = TRUE
HasMPI = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList = "HasFileTransfer,HasMPI,HasJICLocalConfig,HasJICLocalStdin,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0
CpuIsBusy = FALSE
State = "Unclaimed"
EnteredCurrentState = 1082742405
Activity = "Idle"
EnteredCurrentActivity = 1082742405
Start = TRUE
Requirements = START
CurrentRank = 0.000000
DaemonStartTime = 1082742398
UpdateSequenceNumber = 7
MyAddress = "<141.142.15.3:33876>"
LastHeardFrom = 1082744509
UpdatesTotal = 8
UpdatesSequenced = 7
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"


Michael

Condor Support Information:
http://www.cs.wisc.edu/condor/condor-support/
To Unsubscribe, send mail to majordomo@xxxxxxxxxxx with
unsubscribe condor-users <your_email_address>