[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Condor-users] understanding condor history file



Has any one got any answer for me please? I really appreciate some help.

Cheers,
Santanu


On 22/10/10 16:35, Santanu Das wrote:
Hi there,

Recently we started seeing some mismatch in our accounting data and when I looked in to the history file, I found the number of fields are duplicated. Can any one please explain the meaning of those values please? My concerns are especially with RemoteWallClockTime, CompletionDate  and JobStatus but I'd like to know rest of the things as well.

I've sorted the output in alphabetical order and highlighted the duplicate fields for easy reading.

AccountingGroup = "group_atlas.pltatl13"
Arguments = ""
AutoClusterAttrs = "Owner,JobUniverse,LastCheckpointPlatform,NumCkpts,ClientMachine,DiskUsage,ImageSize,Requirements,NiceUser,ConcurrencyLimits"
AutoClusterId = 117
BufferBlockSize = 32768
BufferSize = 524288
BytesRecvd = 57600.000000
BytesSent = 0.000000
ClientMachine = "serv03--hep"
ClusterId = 2389112
Cmd = "/home/pltatl13/.globus/.gass_cache/local/md5/ae/2f122e34f73b0c7794d40a54586722/md5/35/9dc111daa23ff1c5ebac3b6461b1c1/data"

CommittedTime = 0
CommittedTime = 116
CompletionDate = 0
CompletionDate = 1287716586

CondorPlatform = "$CondorPlatform: X86_64-LINUX_RHEL3 $"
CondorVersion = "$CondorVersion: 7.2.4 Jun 15 2009 BuildID: 159529 $"
CoreSize = 0
CumulativeSuspensionTime = 0

CurrentHosts = 0
CurrentHosts = 0
DiskUsage = 2000
DiskUsage = 75
DiskUsage_RAW = 1801
DiskUsage_RAW = 57
EnteredCurrentStatus = 1287715999
EnteredCurrentStatus = 1287716586

EnvDelim = ";"
Env = "GLOBUS_REMOTE_IO_URL=/home/pltatl13/.lcgjm/.remote_io_ptr/remote_io_file-7851.1287715699;LOCAL_JOB_ID=2389112.0;GLOBUS_CE=serv07.hep.phy.cam.ac.uk:2119/jobmanager-lcgcondor-atlas;FACTORYQUEUE=ANALY_CAM;SCRATCH_DIRECTORY=/home/pltatl13/;PANDA_JSID=voatlas61-analy;GTAG=http://voatlas61.cern.ch/pilotsa/2010-10-22/ANALY_CAM/2633623.17.out;GLOBUS_GRAM_MYJOB_CONTACT=URLx-nexus://serv07.hep.phy.cam.ac.uk:20101/;GLOBUS_GRAM_JOB_CONTACT=https://serv07.hep.phy.cam.ac.uk:20099/7851/1287715699/;FACTORYUSER=user;LOGNAME=pltatl13;GLOBUS_LOCATION=/opt/globus
;X509_USER_PROXY=/home/pltatl13/.globus/job/serv07.hep.phy.cam.ac.uk/7851.1287715699/x509_up;HOME=/home/pltatl13"
Err = "globus-cache-export.B32450.batch.err"

ExecutableSize = 47
ExecutableSize_RAW = 46
ExitBySignal = FALSE
ExitBySignal = FALSE

ExitCode = 0
ExitStatus = 0
GlobalJobId = "cam.ac.uk#2389112.0#1287715999"
Group = "group_camgrid"

ImageSize = 450000
ImageSize = 47
ImageSize_RAW = 432460
ImageSize_RAW = 46

In = "/dev/null"
Iwd = "/home/pltatl13"
JobCurrentStartDate = 1287716469
JobFinishedHookDone = 1287716586
JobLeaseDuration = 1200
JobNotification = 0
JobPrio = 0
JobRunCount = 1
JobStartDate = 1287716469

JobStatus = 1
JobStatus = 4

JobUniverse = 5
KillSig = "SIGTERM"
LastJobLeaseRenewal = 1287716585
LastPublicClaimId = "<116.183:9564>#1285416310#6910#..."
LastPublicClaimIds = ""
LastRemoteHost = "slot4@farm051"

LastSuspensionTime = 0
LastSuspensionTime = 0

LeaveJobInQueue = FALSE
LocalSysCpu = 0.000000
LocalUserCpu = 0.000000
MaxHosts = 1
MinHosts = 1
MyType = "Job"
NiceUser = FALSE
NumCkpts = 0
NumCkpts_RAW = 0

NumJobStarts = 0
NumJobStarts = 1

NumRestarts = 0
NumShadowStarts = 1
NumSystemHolds = 0
*** Offset = 0 ClusterId = 2389112 ProcId = 0 Owner = "pltatl13" CompletionDate = 1287716586


OrigMaxHosts = 1
Out = "globus-cache-export.B32450.batch.out"
Owner = "pltatl13"
PeriodicHold = FALSE
PeriodicRelease = FALSE
PeriodicRemove = (((JobStatus == 2) && ((CurrentTime - JobCurrentStartDate) > 345600)) =?= TRUE)
ProcId = 0
QDate = 1287715999
Rank = 0.000000

RemoteSysCpu = 0.000000
RemoteSysCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteWallClockTime = 0.000000
RemoteWallClockTime = 117.000000

RequestCpus = 1
RequestDisk = DiskUsage
RequestMemory = ceiling(ImageSize / 1024.000000)
Requirements = (((Arch == "INTEL") || (Arch == "X86_64"))) && (OpSys == "LINUX") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer)
[ .... ]
WantRemoteIO = TRUE
WantRemoteSyscalls = FALSE
WhenToTransferOutput = "ON_EXIT"

    
I really appreciate if any one can shade some light on it.

Cheers,
Santanu
_______________________________________________ Condor-users mailing list To unsubscribe, send a message to condor-users-request@xxxxxxxxxxx with a subject: Unsubscribe You can also unsubscribe by visiting https://lists.cs.wisc.edu/mailman/listinfo/condor-users The archives can be found at: https://lists.cs.wisc.edu/archive/condor-users/