Hi all,
I recently wrote a stresscode for logTM
on simics-2.2.19 and gems-2.0, on a 4 processor machine. The
stresscode runs two threads generating many conflicting transactions that
would deadlock unless one of the transactions aborts. On running it on logTM
with the XACT_CONFLICT_RES flag set to BASE or TIMESTAMP, however, conflicts are
detected but none of the transactions are aborted. As such the code runs on
indefinitely; a similar code that uses locks to replace transactions by
serialized access to the critical section runs in about 20 minutes. Does anyone
have any idea why logTM is not aborting?The last few lines of debugging output looks like the following
when the stresscode is run for a long time:
1067372669 2 [2,0] TID 1 XACT NACK 0 by
1 [ 1, 0 ] NID: 0 [0x1a06f898, line 0x1a06f880] VA [0x0, line 0x0] PC
[0x112ec, line 0x112c0] my_ts 1067372155 nack_ts 23405901
possible_cycle 0 1067372670 2 [2,0] CONFLICTING REQUEST [0x1a06f898,
line 0x1a06f880] possibleCycle: 0 shouldTrap: 0 1067373184 2 [2,0]
RECEIVED NACK [0x1a06f898, line 0x1a06f880] remote_id: 1 [1,0] myPC: [0x112ec,
line 0x112c0] remotePC: [0xf000613c, line 0xf0006100] local_timestamp:
1067372670 remote_timestamp: 23405901 1067373184 2 [2,0] TID 1
XACT NACK 0 by 1 [ 1, 0 ] NID: 0 [0x1a06f898, line 0x1a06f880] VA [0x0, line
0x0] PC [0x112ec, line 0x112c0] my_ts 1067372670 nack_ts
23405901 possible_cycle 0 1067373185 2 [2,0] CONFLICTING REQUEST
[0x1a06f898, line 0x1a06f880] possibleCycle: 0 shouldTrap: 0 1067373699
2 [2,0] RECEIVED NACK [0x1a06f898, line 0x1a06f880] remote_id: 1 [1,0] myPC:
[0x112ec, line 0x112c0] remotePC: [0xf0031298, line 0xf0031280] local_timestamp:
1067373185 remote_timestamp: 23405901 1067373699 2 [2,0] TID 1
XACT NACK 0 by 1 [ 1, 0 ] NID: 0 [0x1a06f898, line 0x1a06f880] VA [0x0, line
0x0] PC [0x112ec, line 0x112c0] my_ts 1067373185 nack_ts
23405901 possible_cycle 0 1067373700 2 [2,0] CONFLICTING REQUEST
[0x1a06f898, line 0x1a06f880] possibleCycle: 0 shouldTrap: 0 1067374214
2 [2,0] RECEIVED NACK [0x1a06f898, line 0x1a06f880] remote_id: 1 [1,0] myPC:
[0x112ec, line 0x112c0] remotePC: [0xf0031dec, line 0xf0031dc0] local_timestamp:
1067373700 remote_timestamp: 23405901 1067374214 2 [2,0] TID 1
XACT NACK 0 by 1 [ 1, 0 ] NID: 0 [0x1a06f898, line 0x1a06f880] VA [0x0, line
0x0] PC [0x112ec, line 0x112c0] my_ts 1067373700 nack_ts
23405901 possible_cycle 0 1067374215 2 [2,0] CONFLICTING REQUEST
[0x1a06f898, line 0x1a06f880] possibleCycle: 0 shouldTrap: 0 1067374729
2 [2,0] RECEIVED NACK [0x1a06f898, line 0x1a06f880] remote_id: 1 [1,0] myPC:
[0x112ec, line 0x112c0] remotePC: [0xf00306c4, line 0xf00306c0] local_timestamp:
1067374215 remote_timestamp: 23405901 1067374729 2 [2,0] TID 1
XACT NACK 0 by 1 [ 1, 0 ] NID: 0 [0x1a06f898, line 0x1a06f880] VA [0x0, line
0x0] PC [0x112ec, line 0x112c0] my_ts 1067374215 nack_ts
23405901 possible_cycle 0 1067374730 2 [2,0] CONFLICTING REQUEST
[0x1a06f898, line 0x1a06f880] possibleCycle: 0 shouldTrap: 0 1067375244
2 [2,0] RECEIVED NACK [0x1a06f898, line 0x1a06f880] remote_id: 1 [1,0] myPC:
[0x112ec, line 0x112c0] remotePC: [0xf0030eb0, line 0xf0030e80] local_timestamp:
1067374730 remote_timestamp: 23405901
In short, the same thread keeps receiving NACKs
within a transaction and never proceeds.
I'm also including my stresscode, just in
case:
#include <stdio.h> #include
<stdlib.h> #include <math.h> #include
<assert.h> #include <pthread.h> #include
"transaction.h"
#define MAGIC_BREAK do
{
\ __asm__ __volatile__ ("sethi 0x40000,
%g0"); \ } while (0);
#define ARR_SIZE 1000000
int num_loops; int arr_size; int reps; int
array[ARR_SIZE];
void* slaveStart(void *id){
int myid; int tmp; int
i,j,k; int local_sense = 0; int
dummy; myid
= *((int*)id);
tm_bind_to_cabinet(myid+1);
Barrier_breaking(&local_sense, myid, 2);
set_transaction_registers(myid); /* warm-up caches
*/ for (i=0; i<arr_size; ++i) dummy =
array[i];
for (i=0; i<num_loops;
++i){ BEGIN_WORKLOAD_TRANSACTION
BEGIN_TRANSACTION(0) if (myid ==
0) for
(j=0;j<arr_size;++j)
for
(k=0;k<reps;++k)
++array[j];
else for
(j=arr_size-1;j>=0;--j)
for
(k=0;k<reps;++k)
--array[j]; COMMIT_TRANSACTION(0)
END_WORKLOAD_TRANSACTION }
Barrier_breaking(&local_sense, myid,
2); }
int main(int argc, char
**argv){ int
i; pthread_t threads[2]; pthread_attr_t
pthread_custom_attr; int id[2] =
{0,1};
if (argc < 4) {
fprintf (stderr, "./aborter loop-size array-size reps");
return (1); }
num_loops = atoi(argv[1]);
arr_size = atoi(argv[2]); reps = atoi(argv[3]);
// init array for (i=0;
i<ARR_SIZE; ++i) array[i] = 0;
init_transaction_state(2);
// #threads = 2 MAGIC_BREAK ;
pthread_attr_init(&pthread_custom_attr); // get default attributes
pthread_create(&threads[0], &pthread_custom_attr, slaveStart,
(void*)&id[0]); pthread_create(&threads[1],
&pthread_custom_attr, slaveStart, (void*)&id[1]);
// Synchronize on termination
pthread_join(threads[0],NULL);
pthread_join(threads[1],NULL); MAGIC_BREAK ;
return
(0); }
|