[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [HTCondor-users] Python Binding Segmentation Fault



When I run the following code I get segmentation fault. Using python 2.7 and htcondor '$CondorVersion: 8.6.1 Mar 01 2017 BuildID: 398585 $'

I get segmentation fault after all jobs complete and I call print locals()

 

import htcondor

import classad

import os

from collections import Counter

import time

 

known_statuses = [

            'Unexpanded', 'Idle', 'Running', 'Removed', 'Completed', 'Held',

                'Error', '<', '>'

]

 

class HTCondorTask(object):

 

    def gen_proc_ads(self):

        proc_ads = []

        logdir = os.path.join(os.environ['HOME'], 'logs')

        for i in xrange(100):

            proc_ads.append(({'Args': '{}/tmp/job.py'.format(os.environ['HOME']),

                              'UserLog': os.path.join(logdir,'test.log'),

                              'Iwd': logdir,

                              'Err': 'test-err.txt',

                              'Out': 'test-out.txt'}, 1))

        return proc_ads

 

    def program_envs(self):

        env = ['PYTHONHOME=/opt/miniconda/envs/SANS']

        return " ".join(env)

 

    def run(self):

        job_ad = {

            'Cmd': '/opt/miniconda/envs/SANS/bin/python',

            'JobUniverse': 5,

            'RunAsOwner': classad.ExprTree('true'),

            'Environment': self.program_envs(),

            'ShouldTransferFiles': "NO",

            'FileSystemDomain': "eglp.com",

            'TransferIn': classad.ExprTree('false'),

            'TransferINputSizeMB': 0,

            'OnExitHold': classad.ExprTree("(ExitBySignal == True) || (ExitCode != 0)"),

        }

        schedd = htcondor.Schedd()

        proc_ads = self.gen_proc_ads()

        print "Submitting {} jobs to condor".format(len(proc_ads))

        self.cluster_id = schedd.submitMany(job_ad, proc_ads)

        self.track_jobs()

        print locals()

 

    def track_jobs(self):

        schedd = htcondor.Schedd()

        cnt = Counter()

        while True:

            time.sleep(10)

            for job in schedd.xquery(requirements="ClusterId == %d" % self.cluster_id,

                                 projection=["ProcId", "JobStatus"]):

                job_status = known_statuses[job['JobStatus']]

                cnt[job_status] += 1

            if list(cnt) == ['Held']:

                raise Exception()

            print "Job Status:{}".format(cnt)

            if len(cnt) == 0:

                print "Done"

                break

            cnt.clear()

 

if __name__ == '__main__':

    task = HTCondorTask()

    task.run()

 

 

From: Doga Alpman <dalpman@xxxxxxxx>
Date: Friday, May 5, 2017 at 11:50 AM
To: "htcondor-users@xxxxxxxxxxx" <htcondor-users@xxxxxxxxxxx>
Subject: Re: Python Binding Segmentation Fault

 

Here is backtrace, I am trying to boil it down to a small script that I can share.

 

*** Error in `python': double free or corruption (!prev): 0x000000000231fe00 ***

======= Backtrace: =========

/lib64/libc.so.6(+0x7c503)[0x7f2a54c93503]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x8f177)[0x7f2a5598c177]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x783d2)[0x7f2a559753d2]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265db)[0x7f2a55a235db]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyDict_DelItem+0xf7)[0x7f2a5598b347]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyDict_DelItemString+0x31)[0x7f2a5598b9d1]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x724)[0x7f2a559f2384]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCode+0x32)[0x7f2a559fb2e2]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyRun_FileExFlags+0xb0)[0x7f2a55a1b960]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyRun_SimpleFileExFlags+0xef)[0x7f2a55a1bb3f]

/nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(Py_Main+0xca4)[0x7f2a55a31484]

/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2a54c38b35]

python[0x400649]

======= Memory map: ========

00400000-00401000 r-xp 00000000 00:24 63050125                           /nfs/opt/miniconda/envs/SANS/bin/python2.7

00600000-00601000 rw-p 00000000 00:24 63050125                           /nfs/opt/miniconda/envs/SANS/bin/python2.7

00e73000-0244d000 rw-p 00000000 00:00 0                                  [heap]

7f2a24000000-7f2a24021000 rw-p 00000000 00:00 0

7f2a24021000-7f2a28000000 ---p 00000000 00:00 0

7f2a2a2ea000-7f2a2a310000 r-xp 00000000 00:24 62914713                   /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0

7f2a2a310000-7f2a2a50f000 ---p 00026000 00:24 62914713                   /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0

7f2a2a50f000-7f2a2a512000 rw-p 00025000 00:24 62914713                   /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0

7f2a2a512000-7f2a2a576000 r-xp 00000000 ca:01 33652297                   /usr/lib64/condor/libvomsapi.so.1.0.0

7f2a2a576000-7f2a2a775000 ---p 00064000 ca:01 33652297                   /usr/lib64/condor/libvomsapi.so.1.0.0

7f2a2a775000-7f2a2a776000 r--p 00063000 ca:01 33652297                   /usr/lib64/condor/libvomsapi.so.1.0.0

7f2a2a776000-7f2a2a778000 rw-p 00064000 ca:01 33652297                   /usr/lib64/condor/libvomsapi.so.1.0.0

7f2a2a778000-7f2a2a779000 rw-p 00000000 00:00 0

7f2a2a779000-7f2a2a786000 r-xp 00000000 ca:01 33652215                   /usr/lib64/condor/libglobus_gss_assist.so.3.7.15

7f2a2a786000-7f2a2a985000 ---p 0000d000 ca:01 33652215                   /usr/lib64/condor/libglobus_gss_assist.so.3.7.15

7f2a2a985000-7f2a2a986000 r--p 0000c000 ca:01 33652215                   /usr/lib64/condor/libglobus_gss_assist.so.3.7.15

7f2a2a986000-7f2a2a987000 rw-p 0000d000 ca:01 33652215                   /usr/lib64/condor/libglobus_gss_assist.so.3.7.15

7f2a2a987000-7f2a2a9a2000 r-xp 00000000 ca:01 33652221                   /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22

7f2a2a9a2000-7f2a2aba2000 ---p 0001b000 ca:01 33652221                   /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22

7f2a2aba2000-7f2a2aba3000 r--p 0001b000 ca:01 33652221                   /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22

7f2a2aba3000-7f2a2aba4000 rw-p 0001c000 ca:01 33652221                   /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22

7f2a2aba4000-7f2a2abb4000 r-xp 00000000 ca:01 33652209                   /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7

7f2a2abb4000-7f2a2adb3000 ---p 00010000 ca:01 33652209                   /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7

7f2a2adb3000-7f2a2adb4000 r--p 0000f000 ca:01 33652209                   /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7

7f2a2adb4000-7f2a2adb5000 rw-p 00010000 ca:01 33652209                   /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7

7f2a2adb5000-7f2a2adc5000 r-xp 00000000 ca:01 33652206                   /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9

7f2a2adc5000-7f2a2afc4000 ---p 00010000 ca:01 33652206                   /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9

7f2a2afc4000-7f2a2afc5000 r--p 0000f000 ca:01 33652206                   /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9

7f2a2afc5000-7f2a2afc6000 rw-p 00010000 ca:01 33652206                   /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9

7f2a2afc6000-7f2a2afd0000 r-xp 00000000 ca:01 33652200                   /usr/lib64/condor/libglobus_gsi_callback.so.0.5.8Aborted

 

From: Doga Alpman <dalpman@xxxxxxxx>
Date: Wednesday, May 3, 2017 at 4:24 PM
To: "htcondor-users@xxxxxxxxxxx" <htcondor-users@xxxxxxxxxxx>
Subject: Python Binding Segmentation Fault

 

Hi,

We are using submitMany function on htcondor.Schedd object and using xquery to track jobs and we are seeing segmentation faults in python. Anybody else ran into this issue? Looks like some reference count issue.

 

Regards,
Doga