From 0457b1812918a08b61f6d310d96181c6a851ecb0 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Sun, 12 Jul 2015 16:00:31 +0100 Subject: [PATCH 01/10] [virt] SyscallEnter is split into two methods SyscallEnterUnlocked and SyscallEnterLocked to avoid waitUntilQueued time-outs due to the pin internal lock The functionality of SyscallEnter is moved to SyscallEnterUnlocked called without acquiring the Pin internal lock. The actual modification of system call arguments is done in SyscallEnterLocked with acquiring the Pin internal lock. This commit fixes the following issue: https://github.com/s5z/zsim/issues/57 --- src/zsim.cpp | 67 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/src/zsim.cpp b/src/zsim.cpp index 9fa29a7e3..40ff67b02 100644 --- a/src/zsim.cpp +++ b/src/zsim.cpp @@ -149,6 +149,8 @@ VOID FakeCPUIDPost(THREADID tid, ADDRINT* eax, ADDRINT* ebx, ADDRINT* ecx, ADDRI VOID FakeRDTSCPost(THREADID tid, REG* eax, REG* edx); +VOID SyscallEnterUnlocked(THREADID tid, CONTEXT *ctxt); + VOID VdsoInstrument(INS ins); VOID FFThread(VOID* arg); @@ -591,6 +593,10 @@ VOID Instruction(INS ins) { INS_InsertCall(ins, IPOINT_AFTER, (AFUNPTR) FakeRDTSCPost, IARG_THREAD_ID, IARG_REG_REFERENCE, REG_EAX, IARG_REG_REFERENCE, REG_EDX, IARG_END); } + if (INS_IsSyscall(ins)) { + INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR) SyscallEnterUnlocked, IARG_CALL_ORDER, CALL_ORDER_LAST, IARG_THREAD_ID, IARG_CONTEXT, IARG_END); + } + //Must run for every instruction VdsoInstrument(ins); } @@ -813,7 +819,15 @@ VOID VdsoInstrument(INS ins) { bool activeThreads[MAX_THREADS]; // set in ThreadStart, reset in ThreadFini, we need this for exec() (see FollowChild) -bool inSyscall[MAX_THREADS]; // set in SyscallEnter, reset in SyscallExit, regardless of state. We MAY need this for ContextChange +bool inSyscall[MAX_THREADS]; // set in SyscallEnterUnlocked, reset in SyscallExit, regardless of state. We MAY need this for ContextChange + +#define MOD_SYSCALL_ARGS_NUM 6 +#define MOD_SYSCALL_NUMBER_INDEX 6 +#define MOD_SYSCALL_FLAG_INDEX 7 +#define MOD_SYSCALL_INDEXES 8 +// Per-thread storage to transfer modified syscall arguments between invocations of SyscallEnterUnlocked and SyscallEnterLocked. +// Modified syscall args are stored at indexes [0:5], syscall number - at index 6, flag indicating modification - at index 7. +static ADDRINT modifiedSyscallArgs[MAX_THREADS][MOD_SYSCALL_INDEXES]; // set in SyscallEnterUnlocked, used in SyscallEnterLocked uint32_t CountActiveThreads() { // Finish all threads in this process w.r.t. the global scheduler @@ -894,8 +908,46 @@ VOID ThreadFini(THREADID tid, const CONTEXT *ctxt, INT32 flags, VOID *v) { } } -//Need to remove ourselves from running threads in case the syscall is blocking -VOID SyscallEnter(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) { +// Saves modified syscall args in SyscallEnterUnlocked +VOID SaveModifiedSyscallArgs(THREADID tid, CONTEXT *ctxtMod, CONTEXT *ctxtOrig) { + ADDRINT modified = 0; + SYSCALL_STANDARD std = SYSCALL_STANDARD_IA32E_LINUX; + if (PIN_GetSyscallNumber(ctxtMod, std) != PIN_GetSyscallNumber(ctxtOrig, std)) { + modified = 1; + } + for (uint32_t argInd = 0; (argInd < MOD_SYSCALL_ARGS_NUM) && !modified; argInd++) { + if (PIN_GetSyscallArgument(ctxtMod, std, argInd) != PIN_GetSyscallArgument(ctxtOrig, std, argInd)) { + modified = 1; + } + } + if (modified) { + modifiedSyscallArgs[tid][MOD_SYSCALL_NUMBER_INDEX] = PIN_GetSyscallNumber(ctxtMod, std); + for (uint32_t argInd = 0; argInd < MOD_SYSCALL_ARGS_NUM; argInd++) { + modifiedSyscallArgs[tid][argInd] = PIN_GetSyscallArgument(ctxtMod, std, argInd); + } + modifiedSyscallArgs[tid][MOD_SYSCALL_FLAG_INDEX] = modified; + } +} + +// Writes modified syscall args in SyscallEnterLocked +VOID WriteModifiedSyscallArgs(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std) { + assert(std == SYSCALL_STANDARD_IA32E_LINUX); + if (modifiedSyscallArgs[tid][MOD_SYSCALL_FLAG_INDEX]) { + PIN_SetSyscallNumber(ctxt, std, modifiedSyscallArgs[tid][MOD_SYSCALL_NUMBER_INDEX]); + for (uint32_t argInd = 0; argInd < MOD_SYSCALL_ARGS_NUM; argInd++) { + PIN_SetSyscallArgument(ctxt, std, argInd, modifiedSyscallArgs[tid][argInd]); + } + modifiedSyscallArgs[tid][MOD_SYSCALL_FLAG_INDEX] = 0; + } +} + +// Performs action related to SyscallEnter without acquiring the Pin internal lock. +// Need to remove ourselves from running threads in case the syscall is blocking. +// Note: the rationale for having two methods SyscallEnterUnlocked and SyscallEnterLocked +// is descibed at https://github.com/s5z/zsim/issues/57 +VOID SyscallEnterUnlocked(THREADID tid, CONTEXT *ctxt) { + CONTEXT ctxtOrig = *ctxt; + SYSCALL_STANDARD std = SYSCALL_STANDARD_IA32E_LINUX; bool isNopThread = fPtrs[tid].type == FPTR_NOP; bool isRetryThread = fPtrs[tid].type == FPTR_RETRY; @@ -924,6 +976,13 @@ VOID SyscallEnter(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) { fPtrs[tid] = joinPtrs; // will join at the next instr point //info("SyscallEnter %d", tid); } + SaveModifiedSyscallArgs(tid, ctxt, &ctxtOrig); +} + +// Performs action related to SyscallEnter with acquiring the Pin internal lock +VOID SyscallEnterLocked(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) { + assert(inSyscall[tid]); + WriteModifiedSyscallArgs(tid, ctxt, std); } VOID SyscallExit(THREADID tid, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) { @@ -1545,7 +1604,7 @@ int main(int argc, char *argv[]) { PIN_AddThreadStartFunction(ThreadStart, 0); PIN_AddThreadFiniFunction(ThreadFini, 0); - PIN_AddSyscallEntryFunction(SyscallEnter, 0); + PIN_AddSyscallEntryFunction(SyscallEnterLocked, 0); PIN_AddSyscallExitFunction(SyscallExit, 0); PIN_AddContextChangeFunction(ContextChange, 0); From 274a14d7acc5223a04308df1ad0119491c077fd9 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Mon, 13 Jul 2015 12:01:05 +0100 Subject: [PATCH 02/10] [virt] Fixed SaveModifiedSyscallArgs call site. --- src/zsim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zsim.cpp b/src/zsim.cpp index 40ff67b02..3b8e520ac 100644 --- a/src/zsim.cpp +++ b/src/zsim.cpp @@ -953,6 +953,7 @@ VOID SyscallEnterUnlocked(THREADID tid, CONTEXT *ctxt) { if (!isRetryThread) { VirtSyscallEnter(tid, ctxt, std, procTreeNode->getPatchRoot(), isNopThread); + SaveModifiedSyscallArgs(tid, ctxt, &ctxtOrig); } assert(!inSyscall[tid]); inSyscall[tid] = true; @@ -976,7 +977,6 @@ VOID SyscallEnterUnlocked(THREADID tid, CONTEXT *ctxt) { fPtrs[tid] = joinPtrs; // will join at the next instr point //info("SyscallEnter %d", tid); } - SaveModifiedSyscallArgs(tid, ctxt, &ctxtOrig); } // Performs action related to SyscallEnter with acquiring the Pin internal lock From 7cfc5a4dcfe3a32d614711ea4e03e14e01d40465 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Thu, 16 Jul 2015 10:40:09 +0100 Subject: [PATCH 03/10] [virt] Retrying timed out system calls for the remainder of sleep time when transitioning to FF --- src/scheduler.h | 4 +++- src/virt/timeout.cpp | 33 +++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/scheduler.h b/src/scheduler.h index 451b2a780..8ac5a6962 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -482,7 +482,8 @@ class Scheduler : public GlobAlloc, public Callee { return res; } - void notifySleepEnd(uint32_t pid, uint32_t tid) { + // Returns the number of remaining phases to sleep + uint64_t notifySleepEnd(uint32_t pid, uint32_t tid) { futex_lock(&schedLock); uint32_t gid = getGid(pid, tid); ThreadInfo* th = gidMap[gid]; @@ -496,6 +497,7 @@ class Scheduler : public GlobAlloc, public Callee { th->state = BLOCKED; } futex_unlock(&schedLock); + return th->wakeupPhase - zinfo->numPhases; } void printThreadState(uint32_t pid, uint32_t tid) { diff --git a/src/virt/timeout.cpp b/src/virt/timeout.cpp index dcd29635b..dded7a6c9 100644 --- a/src/virt/timeout.cpp +++ b/src/virt/timeout.cpp @@ -88,7 +88,7 @@ static bool PrePatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDARD //info("FUTEX op %d waitOp %d uaddr %p ts %p", op, isFutexWaitOp(op), uaddr, timeout); if (!(uaddr && isFutexWaitOp(op) && timeout)) return false; // not a timeout FUTEX_WAIT - waitNsec = timeout->tv_sec*1000000000L + timeout->tv_nsec; + waitNsec = timespecToNs(*timeout); if (op & FUTEX_CLOCK_REALTIME) { // NOTE: FUTEX_CLOCK_REALTIME is not a documented interface AFAIK, but looking at the Linux source code + with some verification, this is the xlat @@ -149,11 +149,36 @@ static bool PostPatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDAR retrySyscall = isSleeping; } + // Decide whether to retry when transitioning to FF if (retrySyscall && zinfo->procArray[procIdx]->isInFastForward()) { - warn("[%d] Fast-forwarding started, not retrying timeout syscall (%s)", tid, GetSyscallName(syscall)); - retrySyscall = false; assert(isSleeping); - zinfo->sched->notifySleepEnd(procIdx, tid); + uint64_t waitPhasesToSleep = zinfo->sched->notifySleepEnd(procIdx, tid); + if (waitPhasesToSleep > 0) { + ADDRINT timeoutRemArgVal; + uint64_t waitCycles = waitPhasesToSleep * zinfo->phaseLength; + uint64_t waitNsec = waitCycles * 1000 / zinfo->freqMHz; + + if (syscall == SYS_futex) { + int op = (int) PIN_GetSyscallArgument(ctxt, std, 1); + if (op & FUTEX_CLOCK_REALTIME) { + struct timespec realtime; + clock_gettime(CLOCK_REALTIME, &realtime); + uint64_t offsetNs = timespecToNs(realtime); + waitNsec += offsetNs; + warn(" REALTIME FUTEX(%d) fast-forwarding retrial: %ld %ld %ld", op & FUTEX_CLOCK_REALTIME, waitNsec, offsetNs, waitNsec-offsetNs); + } + fakeTimeouts[tid] = nsToTimespec(waitNsec); + timeoutRemArgVal = (ADDRINT) & fakeTimeouts[tid]; + } else { + assert(syscall == SYS_epoll_wait || syscall == SYS_epoll_pwait || syscall == SYS_poll); + timeoutRemArgVal = (ADDRINT) waitNsec / (1000 * 1000); + } + warn("[%d] Fast-forwarding started, retrying timeout syscall (%s)", tid, GetSyscallName(syscall)); + PIN_SetSyscallArgument(ctxt, std, getTimeoutArg(syscall), timeoutRemArgVal); + } else { + warn("[%d] Fast-forwarding started, not retrying timeout syscall (%s)", tid, GetSyscallName(syscall)); + retrySyscall = false; + } } if (retrySyscall) { From 3c9cc0bb433c2aa7cecfa86df07b47197eef6b3d Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Wed, 18 Nov 2015 14:51:45 +0000 Subject: [PATCH 04/10] [virt] Fine-grain locking for Scheduler::isSleeping. Prevents holding the Pin internal lock in PostPatchTimeoutSyscall, when another thread is in waitUntilQueued. --- src/scheduler.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/scheduler.h b/src/scheduler.h index 8ac5a6962..5f8303dae 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -147,6 +147,10 @@ class Scheduler : public GlobAlloc, public Callee { lock_t schedLock; PAD(); + PAD(); + lock_t gidMapLock; + PAD(); + uint64_t curPhase; //uint32_t nextVictim; MTRand rnd; @@ -179,6 +183,7 @@ class Scheduler : public GlobAlloc, public Callee { freeList.push_back(&contexts[i]); } schedLock = 0; + gidMapLock = 0; //nextVictim = 0; //only used when freeList is empty. curPhase = 0; scheduledThreads = 0; @@ -222,7 +227,9 @@ class Scheduler : public GlobAlloc, public Callee { // - SYS_getpid because after a fork (where zsim calls ThreadStart), // getpid() returns the parent's pid (getpid() caches, and I'm // guessing it hasn't flushed its cached pid at this point) + futex_lock(&gidMapLock); gidMap[gid] = new ThreadInfo(gid, syscall(SYS_getpid), syscall(SYS_gettid), mask); + futex_unlock(&gidMapLock); threadsCreated.inc(); futex_unlock(&schedLock); } @@ -233,7 +240,9 @@ class Scheduler : public GlobAlloc, public Callee { //info("[G %d] Finish", gid); assert((gidMap.find(gid) != gidMap.end())); ThreadInfo* th = gidMap[gid]; + futex_lock(&gidMapLock); gidMap.erase(gid); + futex_unlock(&gidMapLock); // Check for suppressed syscall leave(), execute it if (th->fakeLeave) { @@ -474,11 +483,11 @@ class Scheduler : public GlobAlloc, public Callee { } bool isSleeping(uint32_t pid, uint32_t tid) { - futex_lock(&schedLock); uint32_t gid = getGid(pid, tid); + futex_lock(&gidMapLock); ThreadInfo* th = gidMap[gid]; + futex_unlock(&gidMapLock); bool res = th->state == SLEEPING; - futex_unlock(&schedLock); return res; } From 1bc3261a1b02916014081d7be59536021a31d5b1 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Tue, 31 Jan 2017 15:17:38 +0000 Subject: [PATCH 05/10] [virt] Fixed timeout virtualization for FUTEX_CLOCK_REALTIME flag. If FUTEX_CLOCK_REALTIME flag is set, the timeout should be treated as absolute based on CLOCK_REALTIME. --- src/virt/timeout.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/virt/timeout.cpp b/src/virt/timeout.cpp index dded7a6c9..6cfaf19d5 100644 --- a/src/virt/timeout.cpp +++ b/src/virt/timeout.cpp @@ -84,6 +84,7 @@ static bool PrePatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDARD int* uaddr = (int*) PIN_GetSyscallArgument(ctxt, std, 0); int op = (int) PIN_GetSyscallArgument(ctxt, std, 1); const struct timespec* timeout = (const struct timespec*) PIN_GetSyscallArgument(ctxt, std, 3); + uint64_t hostTimeoutNs = 0; //info("FUTEX op %d waitOp %d uaddr %p ts %p", op, isFutexWaitOp(op), uaddr, timeout); if (!(uaddr && isFutexWaitOp(op) && timeout)) return false; // not a timeout FUTEX_WAIT @@ -92,17 +93,21 @@ static bool PrePatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDARD if (op & FUTEX_CLOCK_REALTIME) { // NOTE: FUTEX_CLOCK_REALTIME is not a documented interface AFAIK, but looking at the Linux source code + with some verification, this is the xlat + struct timespec realtime; uint32_t domain = zinfo->procArray[procIdx]->getClockDomain(); uint64_t simNs = cyclesToNs(zinfo->globPhaseCycles); uint64_t offsetNs = simNs + zinfo->clockDomainInfo[domain].realtimeOffsetNs; - //info(" REALTIME FUTEX: %ld %ld %ld %ld", waitNsec, simNs, offsetNs, waitNsec-offsetNs); + warn(" REALTIME FUTEX(%d): %ld %ld %ld %ld", op & FUTEX_CLOCK_REALTIME, waitNsec, simNs, offsetNs, waitNsec-offsetNs); waitNsec = (waitNsec > (int64_t)offsetNs)? (waitNsec - offsetNs) : 0; + + clock_gettime(CLOCK_REALTIME, &realtime); + hostTimeoutNs = timespecToNs(realtime); } if (waitNsec <= 0) return false; // while technically waiting, this does not block. I'm guessing this is done for trylocks? It's weird. - fakeTimeouts[tid].tv_sec = 0; - fakeTimeouts[tid].tv_nsec = 20*1000*1000; // timeout every 20ms of actual host time + hostTimeoutNs += 20*1000*1000; // timeout every 20ms of actual host time + fakeTimeouts[tid] = nsToTimespec(hostTimeoutNs); PIN_SetSyscallArgument(ctxt, std, 3, (ADDRINT)&fakeTimeouts[tid]); } else { assert(syscall == SYS_epoll_wait || syscall == SYS_epoll_pwait || syscall == SYS_poll); @@ -186,6 +191,17 @@ static bool PostPatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDAR //info("[%d] post-patch, retrying, IP: 0x%lx -> 0x%lx", tid, curIp, prevIp); PIN_SetContextReg(ctxt, REG_INST_PTR, prevIp); PIN_SetSyscallNumber(ctxt, std, syscall); + if (syscall == SYS_futex) { + int op = (int) PIN_GetSyscallArgument(ctxt, std, 1); + if (op & FUTEX_CLOCK_REALTIME) { + struct timespec realtime; + uint64_t hostTimeoutNs; + + clock_gettime(CLOCK_REALTIME, &realtime); + hostTimeoutNs = timespecToNs(realtime) + 20*1000*1000; // timeout every 20ms of actual host time + fakeTimeouts[tid] = nsToTimespec(hostTimeoutNs); + } + } } else { // Restore timeout arg PIN_SetSyscallArgument(ctxt, std, getTimeoutArg(syscall), timeoutArgVal); From 32a8fc439ba1ccd3e3e36ba634195f57f9ecd60a Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Tue, 31 Jan 2017 16:26:31 +0000 Subject: [PATCH 06/10] [scheduler] Added DEBUG_SCHEDULER macros for debugging purposes and all commented calls to info(...) were substututed. --- src/scheduler.cpp | 6 +++--- src/scheduler.h | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/scheduler.cpp b/src/scheduler.cpp index e5a65f1cf..c689eaab1 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -179,12 +179,12 @@ void Scheduler::watchdogThreadFunc() { } if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) { - //info("Watchdog Thread: Sleep dep detected...") + DEBUG_SCHEDULER("Watchdog Thread: Sleep dep detected...") int64_t wakeupPhase = sleepQueue.front()->wakeupPhase; int64_t wakeupCycles = (wakeupPhase - curPhase)*zinfo->phaseLength; int64_t wakeupUsec = (wakeupCycles > 0)? wakeupCycles/zinfo->freqMHz : 0; - //info("Additional usecs of sleep %ld", wakeupUsec); + DEBUG_SCHEDULER("Additional usecs of sleep %ld", wakeupUsec); if (wakeupUsec > 10*1000*1000) warn("Watchdog sleeping for a long time due to long sleep, %ld secs", wakeupUsec/1000/1000); futex_unlock(&schedLock); @@ -206,7 +206,7 @@ void Scheduler::watchdogThreadFunc() { if (futex_haswaiters(&schedLock)) { //happens commonly with multiple sleepers and very contended I/O... - //info("Sched: Threads waiting on advance, startPhase %ld curPhase %ld", lastPhase, curPhase); + DEBUG_SCHEDULER("Sched: Threads waiting on advance, startPhase %ld curPhase %ld", lastPhase, curPhase); break; } diff --git a/src/scheduler.h b/src/scheduler.h index 5f8303dae..1aeeddb68 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -44,6 +44,9 @@ #include "stats.h" #include "zsim.h" +//#define DEBUG_SCHEDULER(args...) info(args) +#define DEBUG_SCHEDULER(args...) + /** * TODO (dsm): This class is due for a heavy pass or rewrite. Some things are more complex than they should: * - The OUT state is unnecessary. It is done as a weak link between a thread that left and its context to preserve affinity, but @@ -220,7 +223,7 @@ class Scheduler : public GlobAlloc, public Callee { void start(uint32_t pid, uint32_t tid, const g_vector& mask) { futex_lock(&schedLock); uint32_t gid = getGid(pid, tid); - //info("[G %d] Start", gid); + DEBUG_SCHEDULER("[G %d] Start", gid); assert((gidMap.find(gid) == gidMap.end())); // Get pid and tid straight from the OS // - SYS_gettid because glibc does not implement gettid() @@ -237,7 +240,7 @@ class Scheduler : public GlobAlloc, public Callee { void finish(uint32_t pid, uint32_t tid) { futex_lock(&schedLock); uint32_t gid = getGid(pid, tid); - //info("[G %d] Finish", gid); + DEBUG_SCHEDULER("[G %d] Finish", gid); assert((gidMap.find(gid) != gidMap.end())); ThreadInfo* th = gidMap[gid]; futex_lock(&gidMapLock); @@ -272,7 +275,7 @@ class Scheduler : public GlobAlloc, public Callee { freeList.push_back(ctx); //no need to try to schedule anything; this context was already being considered while in outQueue //assert(runQueue.empty()); need not be the case with masks - //info("[G %d] Removed from outQueue and descheduled", gid); + DEBUG_SCHEDULER("[G %d] Removed from outQueue and descheduled", gid); } //At this point noone holds pointer to th, it's out from all queues, and either on OUT or BLOCKED means it's not pending a handoff delete th; @@ -407,7 +410,7 @@ class Scheduler : public GlobAlloc, public Callee { schedule(dst, ctx); wakeup(dst, false /*no join needed*/); handoffEvents.inc(); - //info("%d starting handoff cid %d to gid %d", th->gid, ctx->cid, dst->gid); + DEBUG_SCHEDULER("%d starting handoff cid %d to gid %d", th->gid, ctx->cid, dst->gid); //We're descheduled and have completed the handoff. Now we need to see if we can be scheduled somewhere else. ctx = schedThread(th); @@ -573,7 +576,7 @@ class Scheduler : public GlobAlloc, public Callee { ctx->curThread = th; scheduleEvents.inc(); scheduledThreads++; - //info("Scheduled %d <-> %d", th->gid, ctx->cid); + DEBUG_SCHEDULER("Scheduled %d <-> %d", th->gid, ctx->cid); zinfo->cores[ctx->cid]->contextSwitch(th->gid); } @@ -592,26 +595,26 @@ class Scheduler : public GlobAlloc, public Callee { //TODO: we may need more callbacks in the cores, e.g. in schedule(). Revise interface as needed... zinfo->cores[ctx->cid]->contextSwitch(-1); zinfo->processStats->notifyDeschedule(ctx->cid, getPid(th->gid)); - //info("Descheduled %d <-> %d", th->gid, ctx->cid); + DEBUG_SCHEDULER("Descheduled %d <-> %d", th->gid, ctx->cid); } void waitForContext(ThreadInfo* th) { th->futexWord = 1; waitEvents.inc(); - //info("%d waiting to be scheduled", th->gid); + DEBUG_SCHEDULER("%d waiting to be scheduled", th->gid); //printState(); futex_unlock(&schedLock); while (true) { int futex_res = syscall(SYS_futex, &th->futexWord, FUTEX_WAIT, 1 /*a racing thread waking us up will change value to 0, and we won't block*/, nullptr, nullptr, 0); if (futex_res == 0 || th->futexWord != 1) break; } - //info("%d out of sched wait, got cid = %d, needsJoin = %d", th->gid, th->cid, th->needsJoin); + DEBUG_SCHEDULER("%d out of sched wait, got cid = %d, needsJoin = %d", th->gid, th->cid, th->needsJoin); if (th->needsJoin) { futex_lock(&schedLock); assert(th->needsJoin); //re-check after the lock zinfo->cores[th->cid]->join(); bar.join(th->cid, &schedLock); - //info("%d join done", th->gid); + DEBUG_SCHEDULER("%d join done", th->gid); } } From 33d76447495ae4060137a1c1d57d742d43307ef7 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Tue, 31 Jan 2017 16:34:12 +0000 Subject: [PATCH 07/10] [scheduler] Descheduling finishing thread on the condition that it has been scheduled. --- src/scheduler.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/scheduler.h b/src/scheduler.h index 1aeeddb68..5885186c5 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -271,11 +271,16 @@ class Scheduler : public GlobAlloc, public Callee { assert(th->owner == &outQueue); outQueue.remove(th); ContextInfo* ctx = &contexts[th->cid]; - deschedule(th, ctx, BLOCKED); - freeList.push_back(ctx); - //no need to try to schedule anything; this context was already being considered while in outQueue - //assert(runQueue.empty()); need not be the case with masks - DEBUG_SCHEDULER("[G %d] Removed from outQueue and descheduled", gid); + // descheduling finishing thread on the condition that it has been scheduled + if (ctx->curThread == th) { + deschedule(th, ctx, BLOCKED); + freeList.push_back(ctx); + //no need to try to schedule anything; this context was already being considered while in outQueue + //assert(runQueue.empty()); need not be the case with masks + DEBUG_SCHEDULER("[G %d] Removed from outQueue and descheduled", gid); + } else { + DEBUG_SCHEDULER("[G %d] Removed from outQueue", gid); + } } //At this point noone holds pointer to th, it's out from all queues, and either on OUT or BLOCKED means it's not pending a handoff delete th; From c764416815398a59042568e75c013f0fc160ba4f Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Mon, 13 Feb 2017 11:20:52 +0000 Subject: [PATCH 08/10] [ooo_core] No store forwarding for false-predicated loads. --- src/ooo_core.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/ooo_core.cpp b/src/ooo_core.cpp index 4a2305eb7..44e33edc8 100644 --- a/src/ooo_core.cpp +++ b/src/ooo_core.cpp @@ -270,19 +270,19 @@ inline void OOOCore::bbl(Address bblAddr, BblInfo* bblInfo) { if (addr != ((Address)-1L)) { reqSatisfiedCycle = l1d->load(addr, dispatchCycle) + L1D_LAT; cRec.record(curCycle, dispatchCycle, reqSatisfiedCycle); - } - // Enforce st-ld forwarding - uint32_t fwdIdx = (addr>>2) & (FWD_ENTRIES-1); - if (fwdArray[fwdIdx].addr == addr) { - // info("0x%lx FWD %ld %ld", addr, reqSatisfiedCycle, fwdArray[fwdIdx].storeCycle); - /* Take the MAX (see FilterCache's code) Our fwdArray - * imposes more stringent timing constraints than the - * l1d, b/c FilterCache does not change the line's - * availCycle on a store. This allows FilterCache to - * track per-line, not per-word availCycles. - */ - reqSatisfiedCycle = MAX(reqSatisfiedCycle, fwdArray[fwdIdx].storeCycle); + // Enforce st-ld forwarding + uint32_t fwdIdx = (addr>>2) & (FWD_ENTRIES-1); + if (fwdArray[fwdIdx].addr == addr) { + // info("0x%lx FWD %ld %ld", addr, reqSatisfiedCycle, fwdArray[fwdIdx].storeCycle); + /* Take the MAX (see FilterCache's code) Our fwdArray + * imposes more stringent timing constraints than the + * l1d, b/c FilterCache does not change the line's + * availCycle on a store. This allows FilterCache to + * track per-line, not per-word availCycles. + */ + reqSatisfiedCycle = MAX(reqSatisfiedCycle, fwdArray[fwdIdx].storeCycle); + } } commitCycle = reqSatisfiedCycle; From 242e997a0ae3c99ccfab9ace4a1a5f44d61efecf Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Fri, 3 Mar 2017 16:34:23 +0000 Subject: [PATCH 09/10] [build] Fixed syscalls identification on 64-bit Ubuntu. If /usr/include/asm/unistd.h does not exist, processing /usr/include/x86_64-linux-gnu/asm/unistd.h instead. --- misc/list_syscalls.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/misc/list_syscalls.py b/misc/list_syscalls.py index dab0524f8..fc6b97ce0 100755 --- a/misc/list_syscalls.py +++ b/misc/list_syscalls.py @@ -2,6 +2,10 @@ # Produces a list of syscalls in the current system import os, re syscallCmd = "gcc -E -dD /usr/include/asm/unistd.h | grep __NR" +if (os.path.exists("/usr/include/asm")): + syscallCmd = "gcc -E -dD /usr/include/asm/unistd.h | grep __NR" +else: + syscallCmd = "gcc -E -dD /usr/include/x86_64-linux-gnu/asm/unistd.h | grep __NR" syscallDefs = os.popen(syscallCmd).read() sysList = [(int(numStr), name) for (name, numStr) in re.findall("#define __NR_(.*?) (\d+)", syscallDefs)] denseList = ["INVALID"]*(max([num for (num, name) in sysList]) + 1) From 8adb92571968b40d796d5172b8b9239e2def64c2 Mon Sep 17 00:00:00 2001 From: Andrey Rodchenko Date: Sat, 4 Mar 2017 11:48:17 +0000 Subject: [PATCH 10/10] [scheduler] Supported finishing of a sleeping thread. --- src/scheduler.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/scheduler.h b/src/scheduler.h index 5885186c5..3144f6a28 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -263,23 +263,25 @@ class Scheduler : public GlobAlloc, public Callee { futex_lock(&schedLock); } - assert_msg(th->state == STARTED /*might be started but in fastFwd*/ ||th->state == OUT || th->state == BLOCKED || th->state == QUEUED, "gid %d finish with state %d", gid, th->state); + assert_msg(th->state == STARTED /*might be started but in fastFwd*/ ||th->state == OUT || th->state == BLOCKED || th->state == SLEEPING || th->state == QUEUED, "gid %d finish with state %d", gid, th->state); if (th->state == QUEUED) { assert(th->owner == &runQueue); runQueue.remove(th); } else if (th->owner) { - assert(th->owner == &outQueue); - outQueue.remove(th); ContextInfo* ctx = &contexts[th->cid]; - // descheduling finishing thread on the condition that it has been scheduled if (ctx->curThread == th) { + // descheduling finishing thread on the condition that it has been scheduled + assert(th->owner == &outQueue); + outQueue.remove(th); deschedule(th, ctx, BLOCKED); freeList.push_back(ctx); //no need to try to schedule anything; this context was already being considered while in outQueue //assert(runQueue.empty()); need not be the case with masks DEBUG_SCHEDULER("[G %d] Removed from outQueue and descheduled", gid); } else { - DEBUG_SCHEDULER("[G %d] Removed from outQueue", gid); + assert(th->owner == &sleepQueue); + sleepQueue.remove(th); + DEBUG_SCHEDULER("[G %d] Removed from sleepQueue", gid); } } //At this point noone holds pointer to th, it's out from all queues, and either on OUT or BLOCKED means it's not pending a handoff