Path: blob/master/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp
41155 views
/*1* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "jfr/jfrEvents.hpp"26#include "jfr/recorder/jfrRecorder.hpp"27#include "jfr/periodic/sampling/jfrCallTrace.hpp"28#include "jfr/periodic/sampling/jfrThreadSampler.hpp"29#include "jfr/recorder/service/jfrOptionSet.hpp"30#include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"31#include "jfr/support/jfrThreadId.hpp"32#include "jfr/support/jfrThreadLocal.hpp"33#include "jfr/utilities/jfrTime.hpp"34#include "jfrfiles/jfrEventClasses.hpp"35#include "logging/log.hpp"36#include "runtime/frame.inline.hpp"37#include "runtime/os.hpp"38#include "runtime/semaphore.hpp"39#include "runtime/thread.inline.hpp"40#include "runtime/threadSMR.hpp"4142enum JfrSampleType {43NO_SAMPLE = 0,44JAVA_SAMPLE = 1,45NATIVE_SAMPLE = 246};4748static bool thread_state_in_java(JavaThread* thread) {49assert(thread != NULL, "invariant");50switch(thread->thread_state()) {51case _thread_new:52case _thread_uninitialized:53case _thread_new_trans:54case _thread_in_vm_trans:55case _thread_blocked_trans:56case _thread_in_native_trans:57case _thread_blocked:58case _thread_in_vm:59case _thread_in_native:60case _thread_in_Java_trans:61break;62case _thread_in_Java:63return true;64default:65ShouldNotReachHere();66break;67}68return false;69}7071static bool thread_state_in_native(JavaThread* thread) {72assert(thread != NULL, "invariant");73switch(thread->thread_state()) {74case _thread_new:75case _thread_uninitialized:76case _thread_new_trans:77case _thread_blocked_trans:78case _thread_blocked:79case _thread_in_vm:80case _thread_in_vm_trans:81case _thread_in_Java_trans:82case _thread_in_Java:83case _thread_in_native_trans:84break;85case _thread_in_native:86return true;87default:88ShouldNotReachHere();89break;90}91return false;92}9394class JfrThreadSampleClosure {95public:96JfrThreadSampleClosure(EventExecutionSample* events, EventNativeMethodSample* events_native);97~JfrThreadSampleClosure() {}98EventExecutionSample* next_event() { return &_events[_added_java++]; }99EventNativeMethodSample* next_event_native() { return &_events_native[_added_native++]; }100void commit_events(JfrSampleType type);101bool do_sample_thread(JavaThread* thread, JfrStackFrame* frames, u4 max_frames, JfrSampleType type);102uint java_entries() { return _added_java; }103uint native_entries() { return _added_native; }104105private:106bool sample_thread_in_java(JavaThread* thread, JfrStackFrame* frames, u4 max_frames);107bool sample_thread_in_native(JavaThread* thread, JfrStackFrame* frames, u4 max_frames);108EventExecutionSample* _events;109EventNativeMethodSample* _events_native;110Thread* _self;111uint _added_java;112uint _added_native;113};114115class OSThreadSampler : public os::SuspendedThreadTask {116public:117OSThreadSampler(JavaThread* thread,118JfrThreadSampleClosure& closure,119JfrStackFrame *frames,120u4 max_frames) : os::SuspendedThreadTask((Thread*)thread),121_success(false),122_thread_oop(thread->threadObj()),123_stacktrace(frames, max_frames),124_closure(closure),125_suspend_time() {}126127void take_sample();128void do_task(const os::SuspendedThreadTaskContext& context);129void protected_task(const os::SuspendedThreadTaskContext& context);130bool success() const { return _success; }131const JfrStackTrace& stacktrace() const { return _stacktrace; }132133private:134bool _success;135oop _thread_oop;136JfrStackTrace _stacktrace;137JfrThreadSampleClosure& _closure;138JfrTicks _suspend_time;139};140141class OSThreadSamplerCallback : public os::CrashProtectionCallback {142public:143OSThreadSamplerCallback(OSThreadSampler& sampler, const os::SuspendedThreadTaskContext &context) :144_sampler(sampler), _context(context) {145}146virtual void call() {147_sampler.protected_task(_context);148}149private:150OSThreadSampler& _sampler;151const os::SuspendedThreadTaskContext& _context;152};153154void OSThreadSampler::do_task(const os::SuspendedThreadTaskContext& context) {155#ifndef ASSERT156guarantee(JfrOptionSet::sample_protection(), "Sample Protection should be on in product builds");157#endif158assert(_suspend_time.value() == 0, "already timestamped!");159_suspend_time = JfrTicks::now();160161if (JfrOptionSet::sample_protection()) {162OSThreadSamplerCallback cb(*this, context);163os::ThreadCrashProtection crash_protection;164if (!crash_protection.call(cb)) {165log_error(jfr)("Thread method sampler crashed");166}167} else {168protected_task(context);169}170}171172/*173* From this method and down the call tree we attempt to protect against crashes174* using a signal handler / __try block. Don't take locks, rely on destructors or175* leave memory (in case of signal / exception) in an inconsistent state. */176void OSThreadSampler::protected_task(const os::SuspendedThreadTaskContext& context) {177JavaThread* jth = context.thread()->as_Java_thread();178// Skip sample if we signaled a thread that moved to other state179if (!thread_state_in_java(jth)) {180return;181}182JfrGetCallTrace trace(true, jth);183frame topframe;184if (trace.get_topframe(context.ucontext(), topframe)) {185if (_stacktrace.record_thread(*jth, topframe)) {186/* If we managed to get a topframe and a stacktrace, create an event187* and put it into our array. We can't call Jfr::_stacktraces.add()188* here since it would allocate memory using malloc. Doing so while189* the stopped thread is inside malloc would deadlock. */190_success = true;191EventExecutionSample *ev = _closure.next_event();192ev->set_starttime(_suspend_time);193ev->set_endtime(_suspend_time); // fake to not take an end time194ev->set_sampledThread(JFR_THREAD_ID(jth));195ev->set_state(static_cast<u8>(java_lang_Thread::get_thread_status(_thread_oop)));196}197}198}199200void OSThreadSampler::take_sample() {201run();202}203204class JfrNativeSamplerCallback : public os::CrashProtectionCallback {205public:206JfrNativeSamplerCallback(JfrThreadSampleClosure& closure, JavaThread* jt, JfrStackFrame* frames, u4 max_frames) :207_closure(closure), _jt(jt), _thread_oop(jt->threadObj()), _stacktrace(frames, max_frames), _success(false) {208}209virtual void call();210bool success() { return _success; }211JfrStackTrace& stacktrace() { return _stacktrace; }212213private:214JfrThreadSampleClosure& _closure;215JavaThread* _jt;216oop _thread_oop;217JfrStackTrace _stacktrace;218bool _success;219};220221static void write_native_event(JfrThreadSampleClosure& closure, JavaThread* jt, oop thread_oop) {222EventNativeMethodSample *ev = closure.next_event_native();223ev->set_starttime(JfrTicks::now());224ev->set_sampledThread(JFR_THREAD_ID(jt));225ev->set_state(static_cast<u8>(java_lang_Thread::get_thread_status(thread_oop)));226}227228void JfrNativeSamplerCallback::call() {229// When a thread is only attach it will be native without a last java frame230if (!_jt->has_last_Java_frame()) {231return;232}233234frame topframe = _jt->last_frame();235frame first_java_frame;236Method* method = NULL;237JfrGetCallTrace gct(false, _jt);238if (!gct.find_top_frame(topframe, &method, first_java_frame)) {239return;240}241if (method == NULL) {242return;243}244topframe = first_java_frame;245_success = _stacktrace.record_thread(*_jt, topframe);246if (_success) {247write_native_event(_closure, _jt, _thread_oop);248}249}250251bool JfrThreadSampleClosure::sample_thread_in_java(JavaThread* thread, JfrStackFrame* frames, u4 max_frames) {252OSThreadSampler sampler(thread, *this, frames, max_frames);253sampler.take_sample();254/* We don't want to allocate any memory using malloc/etc while the thread255* is stopped, so everything is stored in stack allocated memory until this256* point where the thread has been resumed again, if the sampling was a success257* we need to store the stacktrace in the stacktrace repository and update258* the event with the id that was returned. */259if (!sampler.success()) {260return false;261}262EventExecutionSample *event = &_events[_added_java - 1];263traceid id = JfrStackTraceRepository::add(sampler.stacktrace());264assert(id != 0, "Stacktrace id should not be 0");265event->set_stackTrace(id);266return true;267}268269bool JfrThreadSampleClosure::sample_thread_in_native(JavaThread* thread, JfrStackFrame* frames, u4 max_frames) {270JfrNativeSamplerCallback cb(*this, thread, frames, max_frames);271if (JfrOptionSet::sample_protection()) {272os::ThreadCrashProtection crash_protection;273if (!crash_protection.call(cb)) {274log_error(jfr)("Thread method sampler crashed for native");275}276} else {277cb.call();278}279if (!cb.success()) {280return false;281}282EventNativeMethodSample *event = &_events_native[_added_native - 1];283traceid id = JfrStackTraceRepository::add(cb.stacktrace());284assert(id != 0, "Stacktrace id should not be 0");285event->set_stackTrace(id);286return true;287}288289static const uint MAX_NR_OF_JAVA_SAMPLES = 5;290static const uint MAX_NR_OF_NATIVE_SAMPLES = 1;291292void JfrThreadSampleClosure::commit_events(JfrSampleType type) {293if (JAVA_SAMPLE == type) {294assert(_added_java > 0 && _added_java <= MAX_NR_OF_JAVA_SAMPLES, "invariant");295for (uint i = 0; i < _added_java; ++i) {296_events[i].commit();297}298} else {299assert(NATIVE_SAMPLE == type, "invariant");300assert(_added_native > 0 && _added_native <= MAX_NR_OF_NATIVE_SAMPLES, "invariant");301for (uint i = 0; i < _added_native; ++i) {302_events_native[i].commit();303}304}305}306307JfrThreadSampleClosure::JfrThreadSampleClosure(EventExecutionSample* events, EventNativeMethodSample* events_native) :308_events(events),309_events_native(events_native),310_self(Thread::current()),311_added_java(0),312_added_native(0) {313}314315class JfrThreadSampler : public NonJavaThread {316friend class JfrThreadSampling;317private:318Semaphore _sample;319Thread* _sampler_thread;320JfrStackFrame* const _frames;321JavaThread* _last_thread_java;322JavaThread* _last_thread_native;323size_t _interval_java;324size_t _interval_native;325int _cur_index;326const u4 _max_frames;327volatile bool _disenrolled;328329JavaThread* next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current);330void task_stacktrace(JfrSampleType type, JavaThread** last_thread);331JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames);332~JfrThreadSampler();333334void start_thread();335336void enroll();337void disenroll();338void set_java_interval(size_t interval) { _interval_java = interval; };339void set_native_interval(size_t interval) { _interval_native = interval; };340size_t get_java_interval() { return _interval_java; };341size_t get_native_interval() { return _interval_native; };342protected:343virtual void post_run();344public:345virtual char* name() const { return (char*)"JFR Thread Sampler"; }346bool is_JfrSampler_thread() const { return true; }347void run();348static Monitor* transition_block() { return JfrThreadSampler_lock; }349static void on_javathread_suspend(JavaThread* thread);350};351352static void clear_transition_block(JavaThread* jt) {353jt->clear_trace_flag();354JfrThreadLocal* const tl = jt->jfr_thread_local();355if (tl->is_trace_block()) {356MutexLocker ml(JfrThreadSampler::transition_block(), Mutex::_no_safepoint_check_flag);357JfrThreadSampler::transition_block()->notify_all();358}359}360361static bool is_excluded(JavaThread* thread) {362assert(thread != NULL, "invariant");363return thread->is_hidden_from_external_view() || thread->in_deopt_handler() || thread->jfr_thread_local()->is_excluded();364}365366bool JfrThreadSampleClosure::do_sample_thread(JavaThread* thread, JfrStackFrame* frames, u4 max_frames, JfrSampleType type) {367assert(Threads_lock->owned_by_self(), "Holding the thread table lock.");368if (is_excluded(thread)) {369return false;370}371372bool ret = false;373thread->set_trace_flag(); // Provides StoreLoad, needed to keep read of thread state from floating up.374if (JAVA_SAMPLE == type) {375if (thread_state_in_java(thread)) {376ret = sample_thread_in_java(thread, frames, max_frames);377}378} else {379assert(NATIVE_SAMPLE == type, "invariant");380if (thread_state_in_native(thread)) {381ret = sample_thread_in_native(thread, frames, max_frames);382}383}384clear_transition_block(thread);385return ret;386}387388JfrThreadSampler::JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames) :389_sample(),390_sampler_thread(NULL),391_frames(JfrCHeapObj::new_array<JfrStackFrame>(max_frames)),392_last_thread_java(NULL),393_last_thread_native(NULL),394_interval_java(interval_java),395_interval_native(interval_native),396_cur_index(-1),397_max_frames(max_frames),398_disenrolled(true) {399}400401JfrThreadSampler::~JfrThreadSampler() {402JfrCHeapObj::free(_frames, sizeof(JfrStackFrame) * _max_frames);403}404405void JfrThreadSampler::on_javathread_suspend(JavaThread* thread) {406JfrThreadLocal* const tl = thread->jfr_thread_local();407tl->set_trace_block();408{409MonitorLocker ml(transition_block(), Mutex::_no_safepoint_check_flag);410while (thread->is_trace_suspend()) {411ml.wait();412}413tl->clear_trace_block();414}415}416417JavaThread* JfrThreadSampler::next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current) {418assert(t_list != NULL, "invariant");419assert(Threads_lock->owned_by_self(), "Holding the thread table lock.");420assert(_cur_index >= -1 && (uint)_cur_index + 1 <= t_list->length(), "invariant");421assert((current == NULL && -1 == _cur_index) || (t_list->find_index_of_JavaThread(current) == _cur_index), "invariant");422if ((uint)_cur_index + 1 == t_list->length()) {423// wrap424_cur_index = 0;425} else {426_cur_index++;427}428assert(_cur_index >= 0 && (uint)_cur_index < t_list->length(), "invariant");429JavaThread* const next = t_list->thread_at(_cur_index);430return next != first_sampled ? next : NULL;431}432433void JfrThreadSampler::start_thread() {434if (os::create_thread(this, os::os_thread)) {435os::start_thread(this);436} else {437log_error(jfr)("Failed to create thread for thread sampling");438}439}440441void JfrThreadSampler::enroll() {442if (_disenrolled) {443log_trace(jfr)("Enrolling thread sampler");444_sample.signal();445_disenrolled = false;446}447}448449void JfrThreadSampler::disenroll() {450if (!_disenrolled) {451_sample.wait();452_disenrolled = true;453log_trace(jfr)("Disenrolling thread sampler");454}455}456457static jlong get_monotonic_ms() {458return os::javaTimeNanos() / 1000000;459}460461void JfrThreadSampler::run() {462assert(_sampler_thread == NULL, "invariant");463464_sampler_thread = this;465466jlong last_java_ms = get_monotonic_ms();467jlong last_native_ms = last_java_ms;468while (true) {469if (!_sample.trywait()) {470// disenrolled471_sample.wait();472last_java_ms = get_monotonic_ms();473last_native_ms = last_java_ms;474}475_sample.signal();476jlong java_interval = _interval_java == 0 ? max_jlong : MAX2<jlong>(_interval_java, 1);477jlong native_interval = _interval_native == 0 ? max_jlong : MAX2<jlong>(_interval_native, 1);478479jlong now_ms = get_monotonic_ms();480481/*482* Let I be java_interval or native_interval.483* Let L be last_java_ms or last_native_ms.484* Let N be now_ms.485*486* Interval, I, might be max_jlong so the addition487* could potentially overflow without parenthesis (UB). Also note that488* L - N < 0. Avoid UB, by adding parenthesis.489*/490jlong next_j = java_interval + (last_java_ms - now_ms);491jlong next_n = native_interval + (last_native_ms - now_ms);492493jlong sleep_to_next = MIN2<jlong>(next_j, next_n);494495if (sleep_to_next > 0) {496os::naked_short_sleep(sleep_to_next);497}498499if ((next_j - sleep_to_next) <= 0) {500task_stacktrace(JAVA_SAMPLE, &_last_thread_java);501last_java_ms = get_monotonic_ms();502}503if ((next_n - sleep_to_next) <= 0) {504task_stacktrace(NATIVE_SAMPLE, &_last_thread_native);505last_native_ms = get_monotonic_ms();506}507}508}509510void JfrThreadSampler::post_run() {511this->NonJavaThread::post_run();512delete this;513}514515516void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thread) {517ResourceMark rm;518EventExecutionSample samples[MAX_NR_OF_JAVA_SAMPLES];519EventNativeMethodSample samples_native[MAX_NR_OF_NATIVE_SAMPLES];520JfrThreadSampleClosure sample_task(samples, samples_native);521522const uint sample_limit = JAVA_SAMPLE == type ? MAX_NR_OF_JAVA_SAMPLES : MAX_NR_OF_NATIVE_SAMPLES;523uint num_samples = 0;524JavaThread* start = NULL;525526{527elapsedTimer sample_time;528sample_time.start();529{530MutexLocker tlock(Threads_lock);531ThreadsListHandle tlh;532// Resolve a sample session relative start position index into the thread list array.533// In cases where the last sampled thread is NULL or not-NULL but stale, find_index() returns -1.534_cur_index = tlh.list()->find_index_of_JavaThread(*last_thread);535JavaThread* current = _cur_index != -1 ? *last_thread : NULL;536537while (num_samples < sample_limit) {538current = next_thread(tlh.list(), start, current);539if (current == NULL) {540break;541}542if (start == NULL) {543start = current; // remember the thread where we started to attempt sampling544}545if (current->is_Compiler_thread()) {546continue;547}548if (sample_task.do_sample_thread(current, _frames, _max_frames, type)) {549num_samples++;550}551}552*last_thread = current; // remember the thread we last attempted to sample553}554sample_time.stop();555log_trace(jfr)("JFR thread sampling done in %3.7f secs with %d java %d native samples",556sample_time.seconds(), sample_task.java_entries(), sample_task.native_entries());557}558if (num_samples > 0) {559sample_task.commit_events(type);560}561}562563static JfrThreadSampling* _instance = NULL;564565JfrThreadSampling& JfrThreadSampling::instance() {566return *_instance;567}568569JfrThreadSampling* JfrThreadSampling::create() {570assert(_instance == NULL, "invariant");571_instance = new JfrThreadSampling();572return _instance;573}574575void JfrThreadSampling::destroy() {576if (_instance != NULL) {577delete _instance;578_instance = NULL;579}580}581582JfrThreadSampling::JfrThreadSampling() : _sampler(NULL) {}583584JfrThreadSampling::~JfrThreadSampling() {585if (_sampler != NULL) {586_sampler->disenroll();587}588}589590static void log(size_t interval_java, size_t interval_native) {591log_trace(jfr)("Updated thread sampler for java: " SIZE_FORMAT " ms, native " SIZE_FORMAT " ms", interval_java, interval_native);592}593594void JfrThreadSampling::start_sampler(size_t interval_java, size_t interval_native) {595assert(_sampler == NULL, "invariant");596log_trace(jfr)("Enrolling thread sampler");597_sampler = new JfrThreadSampler(interval_java, interval_native, JfrOptionSet::stackdepth());598_sampler->start_thread();599_sampler->enroll();600}601602void JfrThreadSampling::set_sampling_interval(bool java_interval, size_t period) {603size_t interval_java = 0;604size_t interval_native = 0;605if (_sampler != NULL) {606interval_java = _sampler->get_java_interval();607interval_native = _sampler->get_native_interval();608}609if (java_interval) {610interval_java = period;611} else {612interval_native = period;613}614if (interval_java > 0 || interval_native > 0) {615if (_sampler == NULL) {616log_trace(jfr)("Creating thread sampler for java:%zu ms, native %zu ms", interval_java, interval_native);617start_sampler(interval_java, interval_native);618} else {619_sampler->set_java_interval(interval_java);620_sampler->set_native_interval(interval_native);621_sampler->enroll();622}623assert(_sampler != NULL, "invariant");624log(interval_java, interval_native);625} else if (_sampler != NULL) {626_sampler->disenroll();627}628}629630void JfrThreadSampling::set_java_sample_interval(size_t period) {631if (_instance == NULL && 0 == period) {632return;633}634instance().set_sampling_interval(true, period);635}636637void JfrThreadSampling::set_native_sample_interval(size_t period) {638if (_instance == NULL && 0 == period) {639return;640}641instance().set_sampling_interval(false, period);642}643644void JfrThreadSampling::on_javathread_suspend(JavaThread* thread) {645JfrThreadSampler::on_javathread_suspend(thread);646}647648649