[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Condor-users] Condor and Firewall - using SSH Tunneling - Jobs not submitting



I have been working on this all day and I have made some good progress using SSH tunnelling.
 
I have edited my condor_config to only execute the master and the schedd and set the central manager variable to 127.0.0.1
 
The following commands.
 
ssh -fN -i ~/.ssh/clusterlogin -L 9618:thebeast:9618 condor@xxxxxxxxxxxxxx &
ssh -fN -i ~/.ssh/clusterlogin -L 9614:thebeast:9614 condor@xxxxxxxxxxxxxx &
 
-- automatically logs me into my condor central manager (firewalled) without asking for password - which is so I can execute
into the background with the "&" and then forwards port 9618 on the central manager machine to my local machine.
 
 
So I can now successfully execute condor_status and get a listing of my pool. which is great.
and i can also query the negotiator with condor_q -ana and get an answer.
 
Now i try and submit a job but its failes because of my own requirement.
 
This job submits fine from the central manager itself so Im thinking it has to do with my port forwarding setup.
 
condor_q -l 15.0 reports
 
-- Submitter: tux.neuralgrid.org : <146.191.100.202:44953> : tux.neuralgrid.org
MyType = "Job"
TargetType = "Machine"
ClusterId = 15
QDate = 1129590058
CompletionDate = 0
Owner = "chris"
RemoteWallClockTime = 0.000000
LocalUserCpu = 0.000000
LocalSysCpu = 0.000000
RemoteUserCpu = 0.000000
RemoteSysCpu = 0.000000
ExitStatus = 0
NumCkpts = 0
NumRestarts = 0
NumSystemHolds = 0
CommittedTime = 0
TotalSuspensions = 0
LastSuspensionTime = 0
CumulativeSuspensionTime = 0
ExitBySignal = FALSE
CondorVersion = "$CondorVersion: 6.6.8 Jan 27 2005 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RH9 $"
RootDir = "/"
Iwd = "/home/chris/jobs/helloworld"
JobUniverse = 5
Cmd = "/home/chris/jobs/helloworld/helloworld"
MinHosts = 1
MaxHosts = 1
CurrentHosts = 0
WantRemoteSyscalls = FALSE
WantCheckpoint = FALSE
JobStatus = 1
EnteredCurrentStatus = 1129590058
JobPrio = 0
User = "chris@xxxxxxxxxxxxxxxxxx"
NiceUser = FALSE
Env = ""
JobNotification = 2
UserLog = "/home/chris/jobs/helloworld/log.out"
CoreSize = 0
KillSig = "SIGTERM"
Rank = 0.000000
In = "/dev/null"
TransferIn = FALSE
Out = "output_0.out"
Err = "error_0.out"
BufferSize = 524288
BufferBlockSize = 32768
ShouldTransferFiles = "YES"
WhenToTransferOutput = "ON_EXIT"
TransferFiles = "ONEXIT"
ImageSize = 11
ExecutableSize = 11
DiskUsage = 11
Requirements = ((Arch == "X86_64") && (OpSys == "LINUX")) && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer)
PeriodicHold = FALSE
PeriodicRelease = FALSE
PeriodicRemove = FALSE
>>LeaveJobInQueue = FALSE
Args = ""
ProcId = 0
ServerTime = 1129591068
 
condor_status -l reports
 
MyType = "Machine"
TargetType = "Job"
Name = "vm2@xxxxxxxxxxxxxxxxx"
Machine = "node9.cluster.int"
Rank = 0.000000
CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = "thebeast.cluster.int"
CondorVersion = "$CondorVersion: 6.7.10 Aug  3 2005 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RH9 $"
VirtualMachineID = 2
VirtualMemory = 524408
Disk = 219531280
CondorLoadAvg = 0.000000
LoadAvg = 0.000000
KeyboardIdle = 40969102
ConsoleIdle = 40969102
Memory = 2048
Cpus = 1
StartdIpAddr = "<192.168.1.109:33847>"
Arch = "X86_64"
OpSys = "LINUX"
UidDomain = "node9.cluster.int"
FileSystemDomain = "node9.cluster.int"
Subnet = "192.168.1"
HasIOProxy = TRUE
TotalVirtualMemory = 1048816
TotalDisk = 439062560
TotalCpus = 2
TotalMemory = 4096
KFlops = 595101
Mips = 2218
LastBenchmark = 1129584784
TotalLoadAvg = 0.000000
TotalCondorLoadAvg = 0.000000
ClockMin = 63
ClockDay = 2
TotalVirtualMachines = 2
HasFileTransfer = TRUE
HasPerFileEncryption = TRUE
HasReconnect = TRUE
HasMPI = TRUE
HasTDP = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
HasPVM = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList = "HasFileTransfer,HasPerFileEncryption,HasReconnect,HasMPI,HasTDP,HasJI                                  CLocalConfig,HasJICLocalStdin,HasPVM,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0
CpuIsBusy = FALSE
TimeToLive = 2147483647
State = "Unclaimed"
EnteredCurrentState = 1129080334
Activity = "Idle"
EnteredCurrentActivity = 1129584784
Start = TRUE
Requirements = START
MaxJobRetirementTime = 0
CurrentRank = 0.000000
MonitorSelfTime = 1129593698
MonitorSelfCPUUsage = 0.004167
MonitorSelfImageSize = 6896.000000
MonitorSelfResidentSetSize = 3256
MonitorSelfAge = 512393
DaemonStartTime = 1129080324
UpdateSequenceNumber = 1710
MyAddress = "<192.168.1.109:33847>"
LastHeardFrom = 1129590591
UpdatesTotal = 1711
UpdatesSequenced = 1710
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"
 
 
 
 
Is there anything simple I am missing?
 
Chris
 
----- Original Message -----
Sent: Monday, October 17, 2005 3:22 PM
Subject: [Condor-users] Condor and Firewall Configuration/Help

At the moment I have a isolated cluster of nodes.
 
The central manager machine is on the only node that is routable to the outside network.
 
I want to add another node but on the outside network. I want to add this is a submit machine only.
 
On my central manager which is firewall restricted I need to be able to open up the appropiate
ports for this type of communication.
 
Also has anyone been able to SSH tunnelling to get communication going via port 22 ?
 
Chris


_______________________________________________
Condor-users mailing list
Condor-users@xxxxxxxxxxx
https://lists.cs.wisc.edu/mailman/listinfo/condor-users