[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Condor-users] "Failed to open as standard output" error



We installed condor-6.7.13.x86_rh_9 on a testbed cluster at
Caltech. Now I am trying to submit a globus job:

[narsky@citgrid3 OSG]$ globus-job-run 
citgrid3.cacr.caltech.edu:2119/jobmanager-condor /bin/date

The job becomes idle and never finishes. StarterLog.vm1 on the worker
node shows this error:


2/10 12:21:05 Communicating with shadow <192.168.0.254:38311>
2/10 12:21:05 Submitting machine is "citgrid3.local"
2/10 12:21:05 Starting a VANILLA universe job with ID: 8.0
2/10 12:21:05 IWD: /home/narsky
2/10 12:21:05 Failed to open 
'/home/narsky/.globus/job/citgrid3.cacr.caltech.edu/18520.1139599210/stdout' 
as standard output: No such file or directory (errno 2)
2/10 12:21:05 Failed to open 
'/home/narsky/.globus/job/citgrid3.cacr.caltech.edu/18520.1139599210/stderr' 
as standard error: No such file or directory (errno 2)
2/10 12:21:05 Failed to open some/all of the std files...
2/10 12:21:05 Aborting OsProc::StartJob.
2/10 12:21:05 Failed to start job, exiting



Permissions for directory
/home/narsky/.globus/job/citgrid3.cacr.caltech.edu are set to 777.

The condor_config file is enclosed below. 

Can anyone help? 

Thanks,  -Ilya


====================================================================
etc/condor_config
RELEASE_DIR		= /opt/condor/condor

CONDOR_HOST		= 192.168.0.254

LOCAL_DIR		= $(RELEASE_DIR)/hosts/$(HOSTNAME)

LOCAL_CONFIG_FILE = $(LOCAL_DIR)/condor_config.local

CONDOR_ADMIN		= narsky@xxxxxxxxxxxxxxx

MAIL			= /usr/bin/mail

UID_DOMAIN		= local

FILESYSTEM_DOMAIN	= local

COLLECTOR_NAME 		= CIT_ITB_1

CONDOR_IDS=503.503

FLOCK_FROM = 

FLOCK_TO = 
FLOCK_NEGOTIATOR_HOSTS = $(FLOCK_TO)
FLOCK_COLLECTOR_HOSTS = $(FLOCK_TO)

HOSTALLOW_ADMINISTRATOR = $(CONDOR_HOST)

HOSTALLOW_OWNER = $(FULL_HOSTNAME), $(HOSTALLOW_ADMINISTRATOR)

HOSTALLOW_READ = * 
HOSTALLOW_WRITE = *
HOSTALLOW_NEGOTIATOR = $(CONDOR_HOST)
HOSTALLOW_NEGOTIATOR_SCHEDD = $(CONDOR_HOST), $(FLOCK_NEGOTIATOR_HOSTS)


HOSTALLOW_WRITE_COLLECTOR = $(HOSTALLOW_WRITE), $(FLOCK_FROM)
HOSTALLOW_WRITE_STARTD    = $(HOSTALLOW_WRITE), $(FLOCK_FROM)
HOSTALLOW_READ_COLLECTOR  = $(HOSTALLOW_READ), $(FLOCK_FROM)
HOSTALLOW_READ_STARTD     = $(HOSTALLOW_READ), $(FLOCK_FROM)













LOCK		= /var/lock/condor



GLIDEIN_SERVER_URLS = \
  http://www.cs.wisc.edu/condor/glidein/binaries \
  gsiftp://gridftp.cs.wisc.edu/p/condor/public/binaries/glidein

GLIDEIN_SITES = 









ALL_DEBUG               =

MAX_COLLECTOR_LOG	= 1000000
COLLECTOR_DEBUG		=

MAX_KBDD_LOG		= 1000000
KBDD_DEBUG		=

MAX_NEGOTIATOR_LOG	= 1000000
NEGOTIATOR_DEBUG	= D_MATCH
MAX_NEGOTIATOR_MATCH_LOG = 1000000

MAX_SCHEDD_LOG		= 1000000
SCHEDD_DEBUG		= D_COMMAND D_PID

MAX_SHADOW_LOG		= 1000000
SHADOW_DEBUG		=

MAX_STARTD_LOG		= 1000000
STARTD_DEBUG		= D_COMMAND

MAX_STARTER_LOG		= 1000000
STARTER_DEBUG		= D_NODATE

MAX_MASTER_LOG		= 1000000
MASTER_DEBUG		= D_COMMAND



MINUTE		= 60
HOUR		= (60 * $(MINUTE))
StateTimer	= (CurrentTime - EnteredCurrentState)
ActivityTimer	= (CurrentTime - EnteredCurrentActivity)
ActivationTimer = (CurrentTime - JobStart)
LastCkpt	= (CurrentTime - LastPeriodicCheckpoint)

STANDARD	= 1
PVM		= 4
VANILLA		= 5
MPI		= 8
IsPVM           = (TARGET.JobUniverse == $(PVM))
IsMPI           = (TARGET.JobUniverse == $(MPI))
IsVanilla       = (TARGET.JobUniverse == $(VANILLA))
IsStandard      = (TARGET.JobUniverse == $(STANDARD))

NonCondorLoadAvg	= (LoadAvg - CondorLoadAvg)
BackgroundLoad		= 0.3
HighLoad		= 0.5
StartIdleTime		= 15 * $(MINUTE)
ContinueIdleTime	=  5 * $(MINUTE)
MaxSuspendTime		= 10 * $(MINUTE)
MaxVacateTime		= 10 * $(MINUTE)

KeyboardBusy		= (KeyboardIdle < $(MINUTE))
ConsoleBusy		= (ConsoleIdle  < $(MINUTE))
CPUIdle			= ($(NonCondorLoadAvg) <= $(BackgroundLoad))
CPUBusy			= ($(NonCondorLoadAvg) >= $(HighLoad))
KeyboardNotBusy		= ($(KeyboardBusy) == False)

BigJob		= (TARGET.ImageSize >= (50 * 1024))
MediumJob	= (TARGET.ImageSize >= (15 * 1024) && TARGET.ImageSize < 
(50 * 1024))
SmallJob	= (TARGET.ImageSize <  (15 * 1024))

JustCPU			= ($(CPUBusy) && ($(KeyboardBusy) == False))
MachineBusy		= ($(CPUBusy) || $(KeyboardBusy))





WANT_SUSPEND 		= $(UWCS_WANT_SUSPEND)
WANT_VACATE		= $(UWCS_WANT_VACATE)

START			= True

SUSPEND			= False

CONTINUE		= True

PREEMPT			= False

KILL			= $(UWCS_KILL)

PERIODIC_CHECKPOINT	= $(UWCS_PERIODIC_CHECKPOINT)
PREEMPTION_REQUIREMENTS	= $(UWCS_PREEMPTION_REQUIREMENTS)
PREEMPTION_RANK		= $(UWCS_PREEMPTION_RANK)
NEGOTIATOR_PRE_JOB_RANK = $(UWCS_NEGOTIATOR_PRE_JOB_RANK)
NEGOTIATOR_POST_JOB_RANK = $(UWCS_NEGOTIATOR_POST_JOB_RANK)
MaxJobRetirementTime    = $(UWCS_MaxJobRetirementTime)

UWCS_WANT_SUSPEND	= ( $(SmallJob) || $(KeyboardNotBusy) \
                            || $(IsPVM) || $(IsVanilla) )
UWCS_WANT_VACATE 	= ( $(ActivationTimer) > 10 * $(MINUTE) \
                            || $(IsPVM) || $(IsVanilla) )

UWCS_START	= ( (KeyboardIdle > $(StartIdleTime)) \
                    && ( $(CPUIdle) || \
                         (State != "Unclaimed" && State != "Owner")) )

UWCS_SUSPEND = ( $(KeyboardBusy) || \
                 ( (CpuBusyTime > 2 * $(MINUTE)) \
                   && $(ActivationTimer) > 90 ) )

UWCS_CONTINUE = ( $(CPUIdle) && ($(ActivityTimer) > 10) \
                  && (KeyboardIdle > $(ContinueIdleTime)) )

UWCS_PREEMPT = ( ((Activity == "Suspended") && \
                  ($(ActivityTimer) > $(MaxSuspendTime))) \
		 || (SUSPEND && (WANT_SUSPEND == False)) )


UWCS_MaxJobRetirementTime = 0

UWCS_KILL = $(ActivityTimer) > $(MaxVacateTime) 


UWCS_PERIODIC_CHECKPOINT	= $(LastCkpt) > (3 * $(HOUR))



UWCS_NEGOTIATOR_PRE_JOB_RANK = RemoteOwner =?= UNDEFINED


UWCS_PREEMPTION_REQUIREMENTS = ( $(StateTimer) > (1 * $(HOUR)) && \
	RemoteUserPrio > SubmittorPrio * 1.2 ) || (MY.NiceUser == True)

UWCS_PREEMPTION_RANK = (RemoteUserPrio * 1000000) - TARGET.ImageSize



TESTINGMODE_WANT_SUSPEND	= False
TESTINGMODE_WANT_VACATE		= False
TESTINGMODE_START		= True
TESTINGMODE_SUSPEND		= False
TESTINGMODE_CONTINUE		= True
TESTINGMODE_PREEMPT		= False
TESTINGMODE_KILL		= False
TESTINGMODE_PERIODIC_CHECKPOINT	= False
TESTINGMODE_PREEMPTION_REQUIREMENTS = False
TESTINGMODE_PREEMPTION_RANK = 0




LOG		= $(LOCAL_DIR)/log
SPOOL		= $(LOCAL_DIR)/spool
EXECUTE		= $(LOCAL_DIR)/execute
BIN		= $(RELEASE_DIR)/bin
LIB		= $(RELEASE_DIR)/lib
INCLUDE		= $(RELEASE_DIR)/include
SBIN		= $(RELEASE_DIR)/sbin
LIBEXEC		= $(RELEASE_DIR)/libexec

HISTORY		= $(SPOOL)/history

COLLECTOR_LOG	= $(LOG)/CollectorLog
KBDD_LOG	= $(LOG)/KbdLog
MASTER_LOG	= $(LOG)/MasterLog
NEGOTIATOR_LOG	= $(LOG)/NegotiatorLog
NEGOTIATOR_MATCH_LOG = $(LOG)/MatchLog
SCHEDD_LOG	= $(LOG)/SchedLog
SHADOW_LOG	= $(LOG)/ShadowLog
STARTD_LOG	= $(LOG)/StartLog
STARTER_LOG	= $(LOG)/StarterLog

SHADOW_LOCK	= $(LOCK)/ShadowLock

COLLECTOR_HOST  = $(CONDOR_HOST)



RESERVED_DISK		= 5






DAEMON_LIST			= MASTER, STARTD, SCHEDD

DC_DAEMON_LIST = \
MASTER, STARTD, SCHEDD, KBDD, COLLECTOR, NEGOTIATOR, EVENTD, \
VIEW_SERVER, CONDOR_VIEW, VIEW_COLLECTOR, HAWKEYE

MASTER				= $(SBIN)/condor_master
STARTD				= $(SBIN)/condor_startd
SCHEDD				= $(SBIN)/condor_schedd
KBDD				= $(SBIN)/condor_kbdd
NEGOTIATOR			= $(SBIN)/condor_negotiator
COLLECTOR			= $(SBIN)/condor_collector

MASTER_ADDRESS_FILE = $(LOG)/.master_address

PREEN				= $(SBIN)/condor_preen

PREEN_ARGS			= -m -r














STARTER_LIST = STARTER, STARTER_PVM, STARTER_STANDARD
STARTER			= $(SBIN)/condor_starter
STARTER_PVM		= $(SBIN)/condor_starter.pvm
STARTER_STANDARD	= $(SBIN)/condor_starter.std

STARTD_ADDRESS_FILE	= $(LOG)/.startd_address





BenchmarkTimer = (CurrentTime - LastBenchmark)
RunBenchmarks : (LastBenchmark == 0 ) || ($(BenchmarkTimer) >= (4 * 
$(HOUR)))


CONSOLE_DEVICES	= mouse, console


COLLECTOR_HOST_STRING = "$(COLLECTOR_HOST)"
STARTD_EXPRS = COLLECTOR_HOST_STRING

STARTD_JOB_EXPRS = ImageSize, ExecutableSize, JobUniverse, NiceUser















SHADOW_LIST = SHADOW, SHADOW_PVM, SHADOW_STANDARD
SHADOW			= $(SBIN)/condor_shadow
SHADOW_PVM		= $(SBIN)/condor_shadow.pvm
SHADOW_STANDARD		= $(SBIN)/condor_shadow.std

SCHEDD_ADDRESS_FILE	= $(LOG)/.schedd_address





SHADOW_SIZE_ESTIMATE	= 1800

SHADOW_RENICE_INCREMENT	= 10


PERIODIC_EXPR_INTERVAL = 60



QUEUE_SUPER_USERS	= root, condor








PVMD			= $(SBIN)/condor_pvmd

PVMGS			= $(SBIN)/condor_pvmgs














VALID_SPOOL_FILES	= job_queue.log, job_queue.log.tmp, history, \
                          Accountant.log, Accountantnew.log, \
                          local_univ_execute

INVALID_LOG_FILES	= core


JAVA = /usr/bin/java


JAVA_MAXHEAP_ARGUMENT = -Xmx


JAVA_CLASSPATH_DEFAULT = $(LIB) $(LIB)/scimark2lib.jar .


JAVA_CLASSPATH_ARGUMENT = -classpath


JAVA_CLASSPATH_SEPARATOR = :


JAVA_BENCHMARK_TIME = 2


JAVA_EXTRA_ARGUMENTS =


GRIDMANAGER			= $(SBIN)/condor_gridmanager
GT2_GAHP			= $(SBIN)/gahp_server
GRID_MONITOR			= $(SBIN)/grid_monitor.sh


MAX_GRIDMANAGER_LOG	= 1000000
GRIDMANAGER_DEBUG	= D_COMMAND

GRIDMANAGER_LOG = /tmp/GridmanagerLog.$(USERNAME)




DEFAULT_UNIVERSE = globus

CRED_MIN_TIME_LEFT		= 120 


ENABLE_GRID_MONITOR = TRUE

CONDOR_GAHP = $(SBIN)/condor_c-gahp

MAX_C_GAHP_LOG	= 1000000

C_GAHP_LOG = /tmp/CGAHPLog.$(USERNAME)
C_GAHP_WORKER_THREAD_LOG = /tmp/CGAHPWorkerLog.$(USERNAME)

GT3_GAHP = $(SBIN)/gt3_gahp

GT3_LOCATION = $(LIB)/gt3

GT4_GAHP = $(SBIN)/gt4_gahp

GT4_LOCATION = $(LIB)/gt4


GRIDFTP_URL_BASE = gsiftp://$(FULL_HOSTNAME)


CREDD				= $(SBIN)/condor_credd

CREDD_ADDRESS_FILE	= $(LOG)/.credd_address


CREDD_PORT			= 9620
CREDD_ARGS			= -p $(CREDD_PORT) -f

CREDD_LOG			= $(LOG)/CredLog
CREDD_DEBUG			= D_FULLDEBUG
MAX_CREDD_LOG		= 4000000


CRED_STORE_DIR = $(LOCAL_DIR)/cred_dir




STORK				= $(SBIN)/stork_server

STORK_ADDRESS_FILE = $(LOG)/.stork_address


STORK_LOG_BASE		= $(LOG)/Stork

STORK_LOG = $(LOG)/StorkLog
STORK_DEBUG = D_FULLDEBUG
MAX_STORK_LOG = 4000000

STORK_PORT			= 9621
STORK_ARGS = -p $(STORK_PORT) -f -Serverlog $(STORK_LOG_BASE)







QUILL = $(SBIN)/condor_quill

QUILL_LOG = $(LOG)/QuillLog

QUILL_ADDRESS_FILE = $(LOG)/.quill_address










====================================================================