Path: blob/master/src/hotspot/share/jfr/support/jfrAdaptiveSampler.cpp
41149 views
/*1* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.2* Copyright (c) 2020, Datadog, Inc. All rights reserved.3* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.4*5* This code is free software; you can redistribute it and/or modify it6* under the terms of the GNU General Public License version 2 only, as7* published by the Free Software Foundation.8*9* This code is distributed in the hope that it will be useful, but WITHOUT10* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or11* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License12* version 2 for more details (a copy is included in the LICENSE file that13* accompanied this code).14*15* You should have received a copy of the GNU General Public License version16* 2 along with this work; if not, write to the Free Software Foundation,17* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.18*19* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA20* or visit www.oracle.com if you need additional information or have any21* questions.22*23*/2425#include "precompiled.hpp"26#include "jfr/support/jfrAdaptiveSampler.hpp"27#include "jfr/utilities/jfrRandom.inline.hpp"28#include "jfr/utilities/jfrSpinlockHelper.hpp"29#include "jfr/utilities/jfrTime.hpp"30#include "jfr/utilities/jfrTimeConverter.hpp"31#include "jfr/utilities/jfrTryLock.hpp"32#include "logging/log.hpp"33#include "runtime/atomic.hpp"34#include "utilities/globalDefinitions.hpp"35#include <cmath>3637JfrSamplerWindow::JfrSamplerWindow() :38_params(),39_end_ticks(0),40_sampling_interval(1),41_projected_population_size(0),42_measured_population_size(0) {}4344JfrAdaptiveSampler::JfrAdaptiveSampler() :45_prng(this),46_window_0(NULL),47_window_1(NULL),48_active_window(NULL),49_avg_population_size(0),50_ewma_population_size_alpha(0),51_acc_debt_carry_limit(0),52_acc_debt_carry_count(0),53_lock(0) {}5455JfrAdaptiveSampler::~JfrAdaptiveSampler() {56delete _window_0;57delete _window_1;58}5960bool JfrAdaptiveSampler::initialize() {61assert(_window_0 == NULL, "invariant");62_window_0 = new JfrSamplerWindow();63if (_window_0 == NULL) {64return false;65}66assert(_window_1 == NULL, "invariant");67_window_1 = new JfrSamplerWindow();68if (_window_1 == NULL) {69return false;70}71_active_window = _window_0;72return true;73}7475/*76* The entry point to the sampler.77*/78bool JfrAdaptiveSampler::sample(int64_t timestamp) {79bool expired_window;80const bool result = active_window()->sample(timestamp, &expired_window);81if (expired_window) {82JfrTryLock mutex(&_lock);83if (mutex.acquired()) {84rotate_window(timestamp);85}86}87return result;88}8990inline const JfrSamplerWindow* JfrAdaptiveSampler::active_window() const {91return Atomic::load_acquire(&_active_window);92}9394inline int64_t now() {95return JfrTicks::now().value();96}9798inline bool JfrSamplerWindow::is_expired(int64_t timestamp) const {99const int64_t end_ticks = Atomic::load(&_end_ticks);100return timestamp == 0 ? now() >= end_ticks : timestamp >= end_ticks;101}102103bool JfrSamplerWindow::sample(int64_t timestamp, bool* expired_window) const {104assert(expired_window != NULL, "invariant");105*expired_window = is_expired(timestamp);106return *expired_window ? false : sample();107}108109inline bool JfrSamplerWindow::sample() const {110const size_t ordinal = Atomic::add(&_measured_population_size, static_cast<size_t>(1));111return ordinal <= _projected_population_size && ordinal % _sampling_interval == 0;112}113114// Called exclusively by the holder of the lock when a window is determined to have expired.115void JfrAdaptiveSampler::rotate_window(int64_t timestamp) {116assert(_lock, "invariant");117const JfrSamplerWindow* const current = active_window();118assert(current != NULL, "invariant");119if (!current->is_expired(timestamp)) {120// Someone took care of it.121return;122}123rotate(current);124}125126// Subclasses can call this to immediately trigger a reconfiguration of the sampler.127// There is no need to await the expiration of the current active window.128void JfrAdaptiveSampler::reconfigure() {129assert(_lock, "invariant");130rotate(active_window());131}132133// Call next_window_param() to report the expired window and to retreive params for the next window.134void JfrAdaptiveSampler::rotate(const JfrSamplerWindow* expired) {135assert(expired == active_window(), "invariant");136install(configure(next_window_params(expired), expired));137}138139inline void JfrAdaptiveSampler::install(const JfrSamplerWindow* next) {140assert(next != active_window(), "invariant");141Atomic::release_store(&_active_window, next);142}143144const JfrSamplerWindow* JfrAdaptiveSampler::configure(const JfrSamplerParams& params, const JfrSamplerWindow* expired) {145assert(_lock, "invariant");146if (params.reconfigure) {147// Store updated params once to both windows.148const_cast<JfrSamplerWindow*>(expired)->_params = params;149next_window(expired)->_params = params;150configure(params);151}152JfrSamplerWindow* const next = set_rate(params, expired);153next->initialize(params);154return next;155}156157/*158* Exponentially Weighted Moving Average (EWMA):159*160* Y is a datapoint (at time t)161* S is the current EMWA (at time t-1)162* alpha represents the degree of weighting decrease, a constant smoothing factor between 0 and 1.163*164* A higher alpha discounts older observations faster.165* Returns the new EWMA for S166*/167168inline double exponentially_weighted_moving_average(double Y, double alpha, double S) {169return alpha * Y + (1 - alpha) * S;170}171172inline double compute_ewma_alpha_coefficient(size_t lookback_count) {173return lookback_count <= 1 ? 1 : static_cast<double>(1) / static_cast<double>(lookback_count);174}175176inline size_t compute_accumulated_debt_carry_limit(const JfrSamplerParams& params) {177if (params.window_duration_ms == 0 || params.window_duration_ms >= MILLIUNITS) {178return 1;179}180return MILLIUNITS / params.window_duration_ms;181}182183void JfrAdaptiveSampler::configure(const JfrSamplerParams& params) {184assert(params.reconfigure, "invariant");185_avg_population_size = 0;186_ewma_population_size_alpha = compute_ewma_alpha_coefficient(params.window_lookback_count);187_acc_debt_carry_limit = compute_accumulated_debt_carry_limit(params);188_acc_debt_carry_count = _acc_debt_carry_limit;189params.reconfigure = false;190}191192inline int64_t millis_to_countertime(int64_t millis) {193return JfrTimeConverter::nanos_to_countertime(millis * NANOSECS_PER_MILLISEC);194}195196void JfrSamplerWindow::initialize(const JfrSamplerParams& params) {197assert(_sampling_interval >= 1, "invariant");198if (params.window_duration_ms == 0) {199Atomic::store(&_end_ticks, static_cast<int64_t>(0));200return;201}202Atomic::store(&_measured_population_size, static_cast<size_t>(0));203const int64_t end_ticks = now() + millis_to_countertime(params.window_duration_ms);204Atomic::store(&_end_ticks, end_ticks);205}206207/*208* Based on what it has learned from the past, the sampler creates a future 'projection',209* a speculation, or model, of what the situation will be like during the next window.210* This projection / model is used to derive values for the parameters, which are estimates for211* collecting a sample set that, should the model hold, is as close as possible to the target,212* i.e. the set point, which is a function of the number of sample_points_per_window + amortization.213* The model is a geometric distribution over the number of trials / selections required until success.214* For each window, the sampling interval is a random variable from this geometric distribution.215*/216JfrSamplerWindow* JfrAdaptiveSampler::set_rate(const JfrSamplerParams& params, const JfrSamplerWindow* expired) {217JfrSamplerWindow* const next = next_window(expired);218assert(next != expired, "invariant");219const size_t sample_size = project_sample_size(params, expired);220if (sample_size == 0) {221next->_projected_population_size = 0;222return next;223}224next->_sampling_interval = derive_sampling_interval(sample_size, expired);225assert(next->_sampling_interval >= 1, "invariant");226next->_projected_population_size = sample_size * next->_sampling_interval;227return next;228}229230inline JfrSamplerWindow* JfrAdaptiveSampler::next_window(const JfrSamplerWindow* expired) const {231assert(expired != NULL, "invariant");232return expired == _window_0 ? _window_1 : _window_0;233}234235size_t JfrAdaptiveSampler::project_sample_size(const JfrSamplerParams& params, const JfrSamplerWindow* expired) {236return params.sample_points_per_window + amortize_debt(expired);237}238239/*240* When the sampler is configured to maintain a rate, is employs the concepts241* of 'debt' and 'accumulated debt'. 'Accumulated debt' can be thought of as242* a cumulative error term, and is indicative for how much the sampler is243* deviating from a set point, i.e. the ideal target rate. Debt accumulates naturally244* as a function of undersampled windows, caused by system fluctuations,245* i.e. too small populations.246*247* A specified rate is implicitly a _maximal_ rate, so the sampler must ensure248* to respect this 'limit'. Rates are normalized as per-second ratios, hence the249* limit to respect is on a per second basis. During this second, the sampler250* has freedom to dynamically re-adjust, and it does so by 'amortizing'251* accumulated debt over a certain number of windows that fall within the second.252*253* Intuitively, accumulated debt 'carry over' from the predecessor to the successor254* window if within the allowable time frame (determined in # of 'windows' given by255* _acc_debt_carry_limit). The successor window will sample more points to make amends,256* or 'amortize' debt accumulated by its predecessor(s).257*/258size_t JfrAdaptiveSampler::amortize_debt(const JfrSamplerWindow* expired) {259assert(expired != NULL, "invariant");260const intptr_t accumulated_debt = expired->accumulated_debt();261assert(accumulated_debt <= 0, "invariant");262if (_acc_debt_carry_count == _acc_debt_carry_limit) {263_acc_debt_carry_count = 1;264return 0;265}266++_acc_debt_carry_count;267return -accumulated_debt; // negation268}269270inline size_t JfrSamplerWindow::max_sample_size() const {271return _projected_population_size / _sampling_interval;272}273274// The sample size is derived from the measured population size.275size_t JfrSamplerWindow::sample_size() const {276const size_t size = population_size();277return size > _projected_population_size ? max_sample_size() : size / _sampling_interval;278}279280size_t JfrSamplerWindow::population_size() const {281return Atomic::load(&_measured_population_size);282}283284intptr_t JfrSamplerWindow::accumulated_debt() const {285return _projected_population_size == 0 ? 0 : static_cast<intptr_t>(_params.sample_points_per_window - max_sample_size()) + debt();286}287288intptr_t JfrSamplerWindow::debt() const {289return _projected_population_size == 0 ? 0 : static_cast<intptr_t>(sample_size() - _params.sample_points_per_window);290}291292/*293* Inverse transform sampling from a uniform to a geometric distribution.294*295* PMF: f(x) = P(X=x) = ((1-p)^x-1)p296*297* CDF: F(x) = P(X<=x) = 1 - (1-p)^x298*299* Inv300* CDF: F'(u) = ceil( ln(1-u) / ln(1-p) ) // u = random uniform, 0.0 < u < 1.0301*302*/303inline size_t next_geometric(double p, double u) {304assert(u >= 0.0, "invariant");305assert(u <= 1.0, "invariant");306if (u == 0.0) {307u = 0.01;308} else if (u == 1.0) {309u = 0.99;310}311// Inverse CDF for the geometric distribution.312return ceil(log(1.0 - u) / log(1.0 - p));313}314315size_t JfrAdaptiveSampler::derive_sampling_interval(double sample_size, const JfrSamplerWindow* expired) {316assert(sample_size > 0, "invariant");317const size_t population_size = project_population_size(expired);318if (population_size <= sample_size) {319return 1;320}321assert(population_size > 0, "invariant");322const double projected_probability = sample_size / population_size;323return next_geometric(projected_probability, _prng.next_uniform());324}325326// The projected population size is an exponentially weighted moving average, a function of the window_lookback_count.327inline size_t JfrAdaptiveSampler::project_population_size(const JfrSamplerWindow* expired) {328assert(expired != NULL, "invariant");329_avg_population_size = exponentially_weighted_moving_average(expired->population_size(), _ewma_population_size_alpha, _avg_population_size);330return _avg_population_size;331}332333/* GTEST support */334JfrGTestFixedRateSampler::JfrGTestFixedRateSampler(size_t sample_points_per_window, size_t window_duration_ms, size_t lookback_count) : JfrAdaptiveSampler(), _params() {335_sample_size_ewma = 0.0;336_params.sample_points_per_window = sample_points_per_window;337_params.window_duration_ms = window_duration_ms;338_params.window_lookback_count = lookback_count;339_params.reconfigure = true;340}341342bool JfrGTestFixedRateSampler::initialize() {343const bool result = JfrAdaptiveSampler::initialize();344JfrSpinlockHelper mutex(&_lock);345reconfigure();346return result;347}348349/*350* To start debugging the sampler: -Xlog:jfr+system+throttle=debug351* It will log details of each expired window together with an average sample size.352*353* Excerpt:354*355* "JfrGTestFixedRateSampler: avg.sample size: 19.8377, window set point: 20 ..."356*357* Monitoring the relation of average sample size to the window set point, i.e the target,358* is a good indicator of how the sampler is performing over time.359*360*/361static void log(const JfrSamplerWindow* expired, double* sample_size_ewma) {362assert(sample_size_ewma != NULL, "invariant");363if (log_is_enabled(Debug, jfr, system, throttle)) {364*sample_size_ewma = exponentially_weighted_moving_average(expired->sample_size(), compute_ewma_alpha_coefficient(expired->params().window_lookback_count), *sample_size_ewma);365log_debug(jfr, system, throttle)("JfrGTestFixedRateSampler: avg.sample size: %0.4f, window set point: %zu, sample size: %zu, population size: %zu, ratio: %.4f, window duration: %zu ms\n",366*sample_size_ewma, expired->params().sample_points_per_window, expired->sample_size(), expired->population_size(),367expired->population_size() == 0 ? 0 : (double)expired->sample_size() / (double)expired->population_size(),368expired->params().window_duration_ms);369}370}371372/*373* This is the feedback control loop.374*375* The JfrAdaptiveSampler engine calls this when a sampler window has expired, providing376* us with an opportunity to perform some analysis.To reciprocate, we returns a set of377* parameters, possibly updated, for the engine to apply to the next window.378*/379const JfrSamplerParams& JfrGTestFixedRateSampler::next_window_params(const JfrSamplerWindow* expired) {380assert(expired != NULL, "invariant");381assert(_lock, "invariant");382log(expired, &_sample_size_ewma);383return _params;384}385386387