diff options
| author | Hans Boehm <hboehm@google.com> | 2021-09-04 13:35:40 +0000 |
|---|---|---|
| committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-09-04 13:35:40 +0000 |
| commit | 9382d321536ea96ba4ed20d79d85e8dae3e0998d (patch) | |
| tree | 2660c9c7b13b6f9829c0aa41a54815a20709c635 | |
| parent | e3b02a4c074cf1575fd6c983cff8a8089fdfb4b1 (diff) | |
| parent | 1363c1bf72f25f088b1f83250e91a47fc8cc7173 (diff) | |
Improve suspension timeout diagnostic and fix race am: 1363c1bf72
Original change: https://googleplex-android-review.googlesource.com/c/platform/art/+/15754304
Change-Id: I19863c5ecf99a45bc58e36103da91ba61e8eef2a
| -rw-r--r-- | openjdkjvm/OpenjdkJvm.cc | 1 | ||||
| -rw-r--r-- | openjdkjvmti/ti_thread.cc | 1 | ||||
| -rw-r--r-- | runtime/native/dalvik_system_VMStack.cc | 1 | ||||
| -rw-r--r-- | runtime/native/java_lang_Thread.cc | 1 | ||||
| -rw-r--r-- | runtime/thread.cc | 6 | ||||
| -rw-r--r-- | runtime/thread.h | 5 | ||||
| -rw-r--r-- | runtime/thread_list.cc | 30 | ||||
| -rw-r--r-- | runtime/thread_list.h | 5 | ||||
| -rw-r--r-- | test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc | 2 |
9 files changed, 29 insertions, 23 deletions
diff --git a/openjdkjvm/OpenjdkJvm.cc b/openjdkjvm/OpenjdkJvm.cc index d64086d43c..9b514af452 100644 --- a/openjdkjvm/OpenjdkJvm.cc +++ b/openjdkjvm/OpenjdkJvm.cc @@ -423,7 +423,6 @@ JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring jav art::Thread* thread; { thread = thread_list->SuspendThreadByPeer(jthread, - true, art::SuspendReason::kInternal, &timed_out); } diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc index bb8fa3b694..a9a6ee8895 100644 --- a/openjdkjvmti/ti_thread.cc +++ b/openjdkjvmti/ti_thread.cc @@ -898,7 +898,6 @@ jvmtiError ThreadUtil::SuspendOther(art::Thread* self, bool timeout = true; art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer( target_jthread, - /* request_suspension= */ true, art::SuspendReason::kForUserCode, &timeout); if (ret_target == nullptr && !timeout) { diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc index 9d2dfac069..e88516e248 100644 --- a/runtime/native/dalvik_system_VMStack.cc +++ b/runtime/native/dalvik_system_VMStack.cc @@ -59,7 +59,6 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa, ThreadList* thread_list = Runtime::Current()->GetThreadList(); bool timed_out; Thread* thread = thread_list->SuspendThreadByPeer(peer, - /* request_suspension= */ true, SuspendReason::kInternal, &timed_out); if (thread != nullptr) { diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc index 37b3fe642e..c3b4fe09de 100644 --- a/runtime/native/java_lang_Thread.cc +++ b/runtime/native/java_lang_Thread.cc @@ -148,7 +148,6 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) { bool timed_out; // Take suspend thread lock to avoid races with threads trying to suspend this one. Thread* thread = thread_list->SuspendThreadByPeer(peer, - /* request_suspension= */ true, SuspendReason::kInternal, &timed_out); if (thread != nullptr) { diff --git a/runtime/thread.cc b/runtime/thread.cc index 46aa2b59ad..16a5f93be4 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -4468,6 +4468,12 @@ bool Thread::IsSystemDaemon() const { WellKnownClasses::java_lang_Thread_systemDaemon)->GetBoolean(GetPeer()); } +std::string Thread::StateAndFlagsAsHexString() const { + std::stringstream result_stream; + result_stream << std::hex << tls32_.state_and_flags.as_atomic_int.load(); + return result_stream.str(); +} + ScopedExceptionStorage::ScopedExceptionStorage(art::Thread* self) : self_(self), hs_(self_), excp_(hs_.NewHandle<art::mirror::Throwable>(self_->GetException())) { self_->ClearException(); diff --git a/runtime/thread.h b/runtime/thread.h index 7a408021c1..676bfd81de 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -254,7 +254,7 @@ class Thread { bool IsSuspended() const { union StateAndFlags state_and_flags; - state_and_flags.as_int = tls32_.state_and_flags.as_int; + state_and_flags.as_int = tls32_.state_and_flags.as_atomic_int.load(std::memory_order_relaxed); return state_and_flags.as_struct.state != kRunnable && (state_and_flags.as_struct.flags & kSuspendRequest) != 0; } @@ -1517,6 +1517,9 @@ class Thread { }; static_assert(sizeof(StateAndFlags) == sizeof(int32_t), "Weird state_and_flags size"); + // Format state and flags as a hex string. For diagnostic output. + std::string StateAndFlagsAsHexString() const; + static void ThreadExitCallback(void* arg); // Maximum number of suspend barriers. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index f8e99e8f62..84b7384c46 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -874,10 +874,11 @@ static void ThreadSuspendByPeerWarning(Thread* self, } Thread* ThreadList::SuspendThreadByPeer(jobject peer, - bool request_suspension, SuspendReason reason, bool* timed_out) { + bool request_suspension = true; const uint64_t start_time = NanoTime(); + int self_suspend_count = 0; useconds_t sleep_us = kThreadSuspendInitialSleepUs; *timed_out = false; Thread* const self = Thread::Current(); @@ -926,6 +927,7 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, // We hold the suspend count lock but another thread is trying to suspend us. Its not // safe to try to suspend another thread in case we get a cycle. Start the loop again // which will allow this thread to be suspended. + ++self_suspend_count; continue; } CHECK(suspended_thread == nullptr); @@ -957,20 +959,22 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, } const uint64_t total_delay = NanoTime() - start_time; if (total_delay >= thread_suspend_timeout_ns_) { - ThreadSuspendByPeerWarning(self, - ::android::base::FATAL, - "Thread suspension timed out", - peer); - if (suspended_thread != nullptr) { + if (suspended_thread == nullptr) { + ThreadSuspendByPeerWarning(self, + ::android::base::FATAL, + "Failed to issue suspend request", + peer); + } else { CHECK_EQ(suspended_thread, thread); - bool updated = suspended_thread->ModifySuspendCount(soa.Self(), - -1, - nullptr, - reason); - DCHECK(updated); + LOG(WARNING) << "Suspended thread state_and_flags: " + << suspended_thread->StateAndFlagsAsHexString() + << ", self_suspend_count = " << self_suspend_count; + ThreadSuspendByPeerWarning(self, + ::android::base::FATAL, + "Thread suspension timed out", + peer); } - *timed_out = true; - return nullptr; + UNREACHABLE(); } else if (sleep_us == 0 && total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 87a4c8dc61..f5b58a0c54 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -81,11 +81,8 @@ class ThreadList { // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success, // else null. The peer is used to identify the thread to avoid races with the thread terminating. - // If the thread should be suspended then value of request_suspension should be true otherwise - // the routine will wait for a previous suspend request. If the suspension times out then *timeout - // is set to true. + // If the suspension times out then *timeout is set to true. Thread* SuspendThreadByPeer(jobject peer, - bool request_suspension, SuspendReason reason, bool* timed_out) REQUIRES(!Locks::mutator_lock_, diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc index a185446ca5..a10fe2e905 100644 --- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc +++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc @@ -81,7 +81,7 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobje } bool timed_out = false; Thread* other = Runtime::Current()->GetThreadList()->SuspendThreadByPeer( - target, true, SuspendReason::kInternal, &timed_out); + target, SuspendReason::kInternal, &timed_out); CHECK(!timed_out); CHECK(other != nullptr); ScopedSuspendAll ssa(__FUNCTION__); |
