最近在看一个watchdog问题,是由于线程D状态引起的,但是没有打印出native/kernel的调用栈,不知道这个线程导致block在哪里了,不太好分析问题,调用栈如下:
"Binder:1176_17" prio=5 tid=120 Native
| group="main" sCount=1 dsCount=0 obj=0x140c3100 self=0x7f74a52a00
| sysTid=2774 nice=0 cgrp=default sched=0/0 handle=0x7f71f00440
| state=D schedstat=( 71108648427 27941348162 142494 ) utm=4190 stm=2920 core=4 HZ=100
| stack=0x7f71e04000-0x7f71e06000 stackSize=1013KB
| held mutexes=
at com.android.server.power.PowerManagerService.nativeReleaseSuspendBlocker(Native method)
at com.android.server.power.PowerManagerService.access$3900(PowerManagerService.java:101)
at com.android.server.power.PowerManagerService$SuspendBlockerImpl.release(PowerManagerService.java:3523)
- locked <0x01889afd> (a com.android.server.power.PowerManagerService$SuspendBlockerImpl)
at com.android.server.power.PowerManagerService.updateSuspendBlockerLocked(PowerManagerService.java:2570)
at com.android.server.power.PowerManagerService.updatePowerStateLocked(PowerManagerService.java:1643)
at com.android.server.power.PowerManagerService.removeWakeLockLocked(PowerManagerService.java:1106)
at com.android.server.power.PowerManagerService.releaseWakeLockInternal(PowerManagerService.java:1080)
- locked <0x09ee3aa7> (a java.lang.Object)
at com.android.server.power.PowerManagerService.access$4400(PowerManagerService.java:101)
at com.android.server.power.PowerManagerService$BinderService.releaseWakeLock(PowerManagerService.java:3604)
at android.os.IPowerManager$Stub.onTransact(IPowerManager.java:94)
at android.os.Binder.execTransact(Binder.java:460)
所以花时间调查一个dumpStackTraces没有打印出kernel和native调用栈的问题,大概看了一下调用流程:
void Thread::Dump(std::ostream& os, BacktraceMap* backtrace_map) const {
DumpState(os);
DumpStack(os, backtrace_map);
}
继续看看DumpStack方法:
void Thread::DumpStack(std::ostream& os, BacktraceMap* backtrace_map) const {
// TODO: we call this code when dying but may not have suspended the thread ourself. The
// IsSuspended check is therefore racy with the use for dumping (normally we inhibit
// the race with the thread_suspend_count_lock_).
bool dump_for_abort = (gAborting > 0);
bool safe_to_dump = (this == Thread::Current() || IsSuspended());
if (!kIsDebugBuild) {
// We always want to dump the stack for an abort, however, there is no point dumping another
// thread's stack in debug builds where we'll hit the not suspended check in the stack walk.
safe_to_dump = (safe_to_dump || dump_for_abort);
}
if (safe_to_dump) {
// If we're currently in native code, dump that stack before dumping the managed stack.
if (dump_for_abort || ShouldShowNativeStack(this)) {
DumpKernelStack(os, GetTid(), " kernel: ", false);
ArtMethod* method = GetCurrentMethod(nullptr, !dump_for_abort);
DumpNativeStack(os, GetTid(), backtrace_map, " native: ", method);
}
DumpJavaStack(os);
} else {
os << "Not able to dump stack of thread that isn't suspended";
}
}
这里会调用DumpKernelStack/DumpNativeStack/DumpJavaStack/方法,但是在调用前有一个判断:ShouldShowNativeStack
static bool ShouldShowNativeStack(const Thread* thread)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
ThreadState state = thread->GetState();
#ifdef MTK_ART_DONT_DUMP_NATIVE_STACK_FOR_LARGE_PROCESS
if ((thread->GetThreadId()) < 8 || (thread->GetThreadId() > 20)) {
return false;
}
#endif
// In native code somewhere in the VM (one of the kWaitingFor* states)? That's interesting.
if (state > kWaiting && state < kStarting) {
return true;
}
// In an Object.wait variant or Thread.sleep? That's not interesting.
if (state == kTimedWaiting || state == kSleeping || state == kWaiting) {
return false;
}
// Threads with no managed stack frames should be shown.
const ManagedStack* managed_stack = thread->GetManagedStack();
if (managed_stack == nullptr || (managed_stack->GetTopQuickFrame() == nullptr &&
managed_stack->GetTopShadowFrame() == nullptr)) {
return true;
}
// In some other native method? That's interesting.
// We don't just check kNative because native methods will be in state kSuspended if they're
// calling back into the VM, or kBlocked if they're blocked on a monitor, or one of the
// thread-startup states if it's early enough in their life cycle (http://b/7432159).
ArtMethod* current_method = thread->GetCurrentMethod(nullptr);
return current_method != nullptr && current_method->IsNative();
}
#ifdef MTK_ART_DONT_DUMP_NATIVE_STACK_FOR_LARGE_PROCESS
if ((thread->GetThreadId()) < 8 || (thread->GetThreadId() > 20)) {
return false;
}
#endif