[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [Condor-users] Possible Limitation Condor vs Perl / IPC::open3



Also here is the StarterLog file looks like once I have submitted the
perl test file (using ping as the process)

2/28 11:00:45 (fd:6) *** Job ClassAd ***
MyType = "Job"
TargetType = "Machine"
ClusterId = 96
QDate = 1109606442
CompletionDate = 0
Owner = "dlajoie"
NTDomain = "PRODUCTION"
RemoteWallClockTime = 0.000000
LocalUserCpu = 0.000000
LocalSysCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteSysCpu = 0.000000
ExitStatus = 0
NumCkpts = 0
NumRestarts = 0
NumSystemHolds = 0
CommittedTime = 0
TotalSuspensions = 0
LastSuspensionTime = 0
CumulativeSuspensionTime = 0
ExitBySignal = FALSE
CondorVersion = "$CondorVersion: 6.6.7 Oct 14 2004 $"
CondorPlatform = "$CondorPlatform: INTEL-WINNT40 $"
Iwd = "C:\temp"
JobUniverse = 5
Cmd = "C:\Perl\bin\perl.exe"
MinHosts = 1
MaxHosts = 1
WantRemoteSyscalls = FALSE
WantCheckpoint = FALSE
JobPrio = 10
User = "dlajoie@PRODUCTION"
NiceUser = FALSE
Env = ""
JobNotification = 1
NotifyUser = "dlajoie@xxxxxxxxxxxxxxxxxxxx"
UserLog = "C:\temp\96.0.log"
CoreSize = 4560808
Rank = (machine == "DAVELAJ_M60.PRODUCTION")
In = "/dev/null"
TransferIn = FALSE
Out = "96.0_out.txt"
Err = "96.0_err.txt"
BufferSize = 524288
BufferBlockSize = 32768
ShouldTransferFiles = "NO"
TransferFiles = "NEVER"
ImageSize = 41
ExecutableSize = 41
DiskUsage = 41
Requirements = ((Has_XSI42_Batch == TRUE) && (Memory >= 1024)) && (Arch
== "INTEL") && (OpSys == "WINNT51") && (Disk >= DiskUsage) &&
(TARGET.FileSystemDomain == MY.FileSystemDomain)
FileSystemDomain = "PRODUCTION"
PeriodicHold = FALSE
PeriodicRelease = FALSE
PeriodicRemove = FALSE
OnExitHold = FALSE
OnExitRemove = TRUE
LeaveJobInQueue = FALSE
Args = "C:\Condor\wrapper\test.pl"
ProcId = 0
WantMatchDiagnostics = TRUE
LastMatchTime = 1109606442
NumJobMatches = 1
OrigMaxHosts = 1
JobStatus = 2
EnteredCurrentStatus = 1109606444
CurrentHosts = 1
RemoteHost = "DAVELAJ_M60.PRODUCTION"
RemoteVirtualMachineID = 1
ShadowBday = 1109606444
JobStartDate = 1109606444
JobCurrentStartDate = 1109606444
JobRunCount = 1
ServerTime = 1109606445
MyAddress = "<192.168.10.90:1774>"
TransferKey = "1#4223402d8c5681f"
TransferSocket = "<192.168.10.90:1774>"
ShadowVersion = "$CondorVersion: 6.6.7 Oct 14 2004 $"
UidDomain = "PRODUCTION"
2/28 11:00:45 (fd:6) --- End of ClassAd ---
2/28 11:00:45 (fd:6) STARTER_TIMEOUT_MULTIPLIER is undefined, using
default value of 0
2/28 11:00:45 (fd:6) New Daemon obj (shadow) name:
"DAVELAJ_M60.PRODUCTION", pool: "NULL", addr: "NULL"
2/28 11:00:45 (fd:6) Version of Shadow is $CondorVersion: 6.6.7 Oct 14
2004 $
2/28 11:00:45 (fd:6) Starter communicating with condor_shadow
<192.168.10.90:1774>
2/28 11:00:45 (fd:6) Submitting machine is "DAVELAJ_M60.PRODUCTION"
2/28 11:00:45 (fd:6) Doing CONDOR_register_starter_info
2/28 11:00:45 (fd:6) ShouldTransferFiles is "NO", NOT transfering files
2/28 11:00:45 (fd:6) init_user_ids: want user 'nobody@.', current is
'(null)@(null)'
2/28 11:00:45 (fd:6) Using dynamic user account.
2/28 11:00:45 (fd:6) Dynuser: Couldn't param VM# - using 1 by default
2/28 11:00:45 (fd:6) dynuser: Re-enabling account (condor-reuse-vm1)
2/28 11:00:46 (fd:6) dynuser::createuser(condor-reuse-vm1) successful
2/28 11:00:46 (fd:6) PRIV_CONDOR --> PRIV_CONDOR at
..\src\condor_starter.V6.1\starter_class.C:277
2/28 11:00:46 (fd:6) perm::init() starting up for account
(condor-reuse-vm1) domain (NULL)
2/28 11:00:46 (fd:6) perm::init: Found Account Name condor-reuse-vm1
2/28 11:00:46 (fd:6) Done moving to directory
"C:\Condor\execute\dir_3984"
2/28 11:00:46 (fd:6) PRIV_CONDOR --> PRIV_CONDOR at
..\src\condor_starter.V6.1\starter_class.C:377
2/28 11:00:46 (fd:6) TokenCache contents: 
condor-reuse-vm1@.
2/28 11:00:46 (fd:6) PRIV_CONDOR --> PRIV_USER at
..\src\condor_starter.V6.1\jic_shadow.C:172
2/28 11:00:46 (fd:6) JICShadow::initIOProxy(): Job does not define
WantIOProxy
2/28 11:00:46 (fd:6) PRIV_USER --> PRIV_CONDOR at
..\src\condor_starter.V6.1\jic_shadow.C:174
2/28 11:00:46 (fd:6) No StarterUserLog found in job ClassAd
2/28 11:00:46 (fd:6) Starter will not write a local UserLog
2/28 11:00:46 (fd:6) Starting a VANILLA universe job with ID: 96.0
2/28 11:00:46 (fd:6) In OsProc::OsProc()
2/28 11:00:46 (fd:6) Main job KillSignal: 15 (Unknown)
2/28 11:00:46 (fd:6) Main job RmKillSignal: 15 (Unknown)
2/28 11:00:46 (fd:6) in VanillaProc::StartJob()
2/28 11:00:46 (fd:6) in OsProc::StartJob()
2/28 11:00:46 (fd:6) IWD: C:\temp
2/28 11:00:46 (fd:6) TokenCache contents: 
condor-reuse-vm1@.
2/28 11:00:46 (fd:6) PRIV_CONDOR --> PRIV_USER at
..\src\condor_starter.V6.1\os_proc.C:227
2/28 11:00:46 (fd:7) Output file: C:\temp\96.0_out.txt
2/28 11:00:46 (fd:8) Error file: C:\temp\96.0_err.txt
2/28 11:00:46 (fd:8) Doing CONDOR_begin_execution
2/28 11:00:46 (fd:8) Renice expr "10" evaluated to 10
2/28 11:00:46 (fd:8) About to exec C:\Perl\bin\perl.exe
C:\Condor\wrapper\test.pl
2/28 11:00:46 (fd:8) Env =
_CONDOR_SCRATCH_DIR=C:\Condor\execute\dir_3984
2/28 11:00:46 (fd:8) PRIV_USER --> PRIV_CONDOR at
..\src\condor_starter.V6.1\os_proc.C:444
2/28 11:00:46 (fd:8) In
DaemonCore::Create_Process(C:\Perl\bin\perl.exe,...)
2/28 11:00:46 (fd:8) TokenCache contents: 
condor-reuse-vm1@.
2/28 11:00:46 (fd:8) PRIV_CONDOR --> PRIV_USER at
..\src\condor_daemon_core.V6\daemon_core.C:4920
2/28 11:00:46 (fd:8) PRIV_USER --> PRIV_CONDOR at
..\src\condor_daemon_core.V6\daemon_core.C:4926
2/28 11:00:46 (fd:8) Child Process: pid 1772 at 
2/28 11:00:46 (fd:6) Create_Process succeeded, pid=1772
2/28 11:00:46 (fd:6) Created new ProcFamily w/ pid 1772 as parent
2/28 11:00:46 (fd:6) EXECUTE_LOGIN_IS_DEDICATED is undefined, using
default value of False
2/28 11:00:46 (fd:6) in DaemonCore NewTimer()

Dave.
| -----Original Message-----
| From: condor-users-bounces@xxxxxxxxxxx 
| [mailto:condor-users-bounces@xxxxxxxxxxx] On Behalf Of Dave Lajoie
| Sent: Monday, February 28, 2005 11:05 AM
| To: Condor-Users Mail List; Dave Lajoie
| Subject: RE: [Condor-users] Possible Limitation Condor vs 
| Perl / IPC::open3
| 
| Correction
| 	the perl script below is incorrect (uses ping as the process)
| 	the script should look like this instead	
| 
| $cmd = 'ping -n 10 localhost |';
| $pid = open(XSIPROC,$cmd) or die "Cannot run $cmd: + $!";
| 
| while(<XSIPROC>)
| {
| 	print $_;
| }
| 
| I get the following error when submitting the file Cannot run 
| ping -n 10 localhost |: + Bad file descriptor at 
| C:\Condor\wrapper\test.pl line 2.
| 
| If I run it thru command line, it works.
| 
| C:\>perl c:\Condor\wrapper\test.pl
| 
| Pinging DAVELAJ_M60.PRODUCTION [127.0.0.1] with 32 bytes of data:
| 
| Reply from 127.0.0.1: bytes=32 time<1ms TTL=64 Reply from 
| 127.0.0.1: bytes=32 time<1ms TTL=64
| 
| Dave.
| | -----Original Message-----
| | From: condor-users-bounces@xxxxxxxxxxx 
| | [mailto:condor-users-bounces@xxxxxxxxxxx] On Behalf Of Dave Lajoie
| | Sent: Monday, February 28, 2005 10:18 AM
| | To: Condor-Users Mail List; Dave Lajoie
| | Subject: RE: [Condor-users] Possible Limitation Condor vs Perl / 
| | IPC::open3
| | 
| | Erik, Filip, John
| | Here are the files
| | 
| | Submit file
| | ____________________________________________
| | universe = vanilla
| | executable = C:\Perl\bin\perl.exe
| | Requirements = (Has_XSI42_Batch == TRUE) && (Memory >= 1024) Rank = 
| | (machine == "DAVELAJ_M60.PRODUCTION") output = 
| | $(Cluster).$(Process)_out.txt error = $(Cluster).$(Process)_err.txt 
| | log = $(Cluster).$(Process).log notification = always notify_user = 
| | dlajoie@xxxxxxxxxxxxxxxxxxxx copy_to_spool = false 
| | should_transfer_files = false
| | 
| | priority = 10
| | initialdir = C:\temp
| | arguments = C:\Condor\wrapper\xsibatch42_wrapper.pl
| | Queue
| | 
| | 
| | Perl Wrapper
| | __________________________________
| | use Config;
| | use IPC::Open3;
| | 
| | use sigtrap 'handler' => \&myINT, 'INT'; use sigtrap 'handler' => 
| | \&myTERM, 'TERM'; use sigtrap 'handler' => \&myKILL, 'KILL'; use 
| | sigtrap 'handler' => \&myAlarm, 'ALRM'; use sigtrap 'handler' => 
| | \&myPIPE, 'PIPE';
| | 
| | 
| | sub myAlarm
| | {
| | }
| | 
| | sub myPIPE
| | {
| |     my $signame = shift;
| |     print "\n caught SIG$signame";
| |     exit(0);
| | }
| | 
| | sub myINT
| | {
| |     my $signame = shift;
| |     print "\n caught CONDOR::KILL \n";
| |     killChildrenProc();
| |     exit(0);
| | }
| | 
| | sub myKILL
| | {
| |     my $signame = shift;
| |     print "\n caught CONDOR::KILL \n";
| |     killChildrenProc();
| |     exit(0);
| | }
| | 
| | sub myTERM
| | {
| |     my $signame = shift;
| |     print "\n caught CONDOR::VACATE \n";
| |     killChildrenProc();
| |     exit(0);
| | }
| | 
| | sub killChildrenProc
| | {
| | 	if ( $Config::Config{'osname'} eq "MSWin32")
| | 	{
| | 		# first we must kill the shell
| | 		kill 9, $pid;
| | 		open(TLIST,"tasklist 2>&1|");
| | 		while(<TLIST>)
| | 		{
| | 			if ($_ =~ /^XSIBATCH.exe\s+(\d+)/)
| | 			{
| | 				push @tlist, $1;
| | 			}
| | 		}
| | 		kill 9, @tlist;
| | 	} elsif ( $Config::Config{'osname'} eq 'linux')
| | 	{
| | 		open(TLIST,"ps -eo pid,comm |");
| | 		while(<TLIST>)
| | 		{
| | 			if ($_ =~ /(\d+)\s+XSIBATCH/)
| | 			{
| | 				push @tlist, $1;
| | 			}
| | 			sort @tlist;
| | 			kill 9, @tlist;
| | 		}	
| | 	}
| | }
| | 
| | $cmd = 'C:\Softimage\XSI_4.2\Application\bin\xsibatch.bat -r -scene 
| | "C:\data\customers\condor_tests\Scenes\condor_render_test_v02.
| | scn" -mb off -startframe 1 -endframe 10 -verbose prog -skip false';
| | 
| | $pid = open3("<&STDIN", \*XSIPROC, \*XSIPROC, $cmd) or die 
| "Cannot run
| | $cmd: + $!";
| | 	
| | for(;;)
| | {
| | 	sleep 1;
| | 	$line_count	= 0;
| | 	while(<XSIPROC>)
| | 	{
| | 		if ($_ =~ /^\'ERROR/i)
| | 		{
| | 			print STDERR "WRAPPER::PROCESS_ERROR " . $_;
| | 		} else
| | 		{
| | 			print STDOUT $_;
| | 		}
| | 		if ( $line_count++ > 10 )
| | 		{
| | 			last;
| | 		}
| | 	}
| | }
| | 
| | 
| | Here is a simpler test you can use to repro the problem
| | 
| | Submit file
| | ______________________________________________________
| | universe = vanilla
| | executable = C:\Perl\bin\perl.exe
| | Requirements = (Has_XSI42_Batch == TRUE) && (Memory >= 1024) Rank = 
| | (machine == "DAVELAJ_M60.PRODUCTION") output = 
| | $(Cluster).$(Process)_out.txt error = $(Cluster).$(Process)_err.txt 
| | log = $(Cluster).$(Process).log notification = always notify_user = 
| | dlajoie@xxxxxxxxxxxxxxxxxxxx copy_to_spool = false 
| | should_transfer_files = false
| | 
| | priority = 10
| | initialdir = C:\temp
| | arguments = C:\Condor\wrapper\test.pl
| | Queue
| | 
| | 
| | Test.pl
| | ________________________________________________________
| | $cmd = 'ping -n 10000 localhost |';
| | $pid = open(XSIPROC,$cmd) ;
| | 
| | # or die "Cannot run $cmd: + $!";
| | 
| | for(;;)
| | {
| |         sleep 1;
| |         $line_count     = 0;
| |         while(<XSIPROC>)
| |         {
| |                 print $_;
| |                 if ( $line_count++ > 10 )
| |                 {
| |                         last;
| |                 }
| |         }
| |         print "____\n";
| | }
| | 
| | _______________________________________________
| | Condor-users mailing list
| | Condor-users@xxxxxxxxxxx
| | https://lists.cs.wisc.edu/mailman/listinfo/condor-users
| | 
| 
| _______________________________________________
| Condor-users mailing list
| Condor-users@xxxxxxxxxxx
| https://lists.cs.wisc.edu/mailman/listinfo/condor-users
|