[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Condor-users] Chirp server error?



Title: Chirp server error?

Hello,

I'm trying to run a linpack benchmark in the parallel universe and I am getting the following error regarding the chirp server. Any idea what it means or what I might have forgotten.

---condor_submit script

buzzard-master[rnclear]: cat ross.condor
######################################
## Example submit description file
## for MPICH 1 MPI
## works with MPICH 1.2.4, 1.2.5 and 1.2.6
######################################
universe = parallel
executable = mp1script
arguments = xhpl
machine_count = 4
environment = "P4_GLOBMEMSIZE=16777290"
+RemoteSpoolDir = "unused"
log = ross.log
output = ross_output.out
error = ross_error.out
queue

--- output of mp1script -----

buzzard-master[rnclear]: cat ross_error.out
+ _CONDOR_PROCNO=1
+ _CONDOR_NPROCS=4
++ condor_config_val libexec
+ CONDOR_SSH=/opt/condor/libexec
+ CONDOR_SSH=/opt/condor/libexec/condor_ssh
++ condor_config_val libexec
+ SSHD_SH=/opt/condor/libexec
+ SSHD_SH=/opt/condor/libexec/sshd.sh
+ . /opt/condor/libexec/sshd.sh 1 4
++ trap sshd_cleanup 15
+++ condor_config_val CONDOR_SSHD
++ SSHD=/usr/sbin/sshd
+++ condor_config_val CONDOR_SSH_KEYGEN
++ KEYGEN=/usr/bin/ssh-keygen
+++ condor_config_val libexec
++ CONDOR_CHIRP=/opt/condor/libexec
++ CONDOR_CHIRP=/opt/condor/libexec/condor_chirp
++ PORT=4444
++ _CONDOR_REMOTE_SPOOL_DIR=/opt/condor/local.buzzard-master/spool/cluster3208.proc0.subproc0
++ _CONDOR_PROCNO=1
++ _CONDOR_NPROCS=4
++ mkdir /opt/condor/local.sahp5785/execute/dir_30263/tmp
++ hostkey=/opt/condor/local.sahp5785/execute/dir_30263/tmp/hostkey
++ /bin/rm -f /opt/condor/local.sahp5785/execute/dir_30263/tmp/hostkey /opt/condor/local.sahp5785/execute/dir_30263/tmp/hostkey.++ /++ /usr/bin/ssh-keygen -q -f /opt/condor/local.sahp4615/execute/dir_30531/tmp/hostkey -t rsa -++ '[++ '[' 0 -ne 0 ']'

++ idkey=/opt/condor/local.sahp4661/execute/dir_30568/tmp/3.key
++ /usr/bin/ssh-keygen -q -f /opt/condor/local.sahp4661/execute/dir_30568/tmp/3.key -t rsa -N ''
+++ sshd_cleanup
+++ /bin/rm -f /opt/condor/local.sahp4661/execute/dir_30568/tmp/hostkey /opt/condor/local.sahp4661/execute/dir_30568/tmp/hostkey.pub /opt/condor/local.sahp4661/execute/dir_30568/tmp/3.key /opt/condor/local.sahp4661/execute/dir_30568/tmp/3.key.pub sshd.out /opt/condor/local.sahp4661/execute/dir_30568/contact

++ '[' 0 -ne 0 ']'
++ /opt/condor/libexec/condor_chirp put -perm 0700 /opt/condor/local.sahp4661/execute/dir_30568/tmp/3.key /opt/condor/local.buzzard-master/spool/cluster3208.proc0.subproc0/3.key

Can't connect to chirp server
++ '[' 255 -ne 0 ']'
++ echo error 0 chirp putting identity keys back
++ exit -1
--
Richard N. Cleary
Sandia National Laboratories
Dept. 4324 Infrastructure Computing Systems
Email: rnclear@xxxxxxxxxx
Phone: 505.845.7836