Path: blob/main/cyberbattle/agents/baseline/plotting.py
597 views
# Copyright (c) Microsoft Corporation.1# Licensed under the MIT License.23"""Plotting helpers for agent banchmarking"""45import matplotlib.pyplot as plt # type:ignore6import numpy as np78import matplotlib # type: ignore910matplotlib.use("Agg")111213def new_plot(title):14"""Prepare a new plot of cumulative rewards"""15plt.figure(figsize=(10, 8))16plt.ylabel("cumulative reward", fontsize=20)17plt.xlabel("step", fontsize=20)18plt.xticks(size=20)19plt.yticks(size=20)20plt.title(title, fontsize=12)212223def pad(array, length):24"""Pad an array with 0s to make it of desired length"""25padding = np.zeros((length,))26padding[: len(array)] = array27return padding282930def plot_episodes_rewards_averaged(results):31"""Plot cumulative rewards for a given set of specified episodes"""32max_iteration_count = np.max([len(r) for r in results["all_episodes_rewards"]])3334all_episodes_rewards_padded = [pad(rewards, max_iteration_count) for rewards in results["all_episodes_rewards"]]35cumrewards = np.cumsum(all_episodes_rewards_padded, axis=1)36avg = np.average(cumrewards, axis=0)37std = np.std(cumrewards, axis=0)38x = [i for i in range(len(std))]39plt.plot(x, avg, label=results["title"])40plt.fill_between(x, avg - std, avg + std, alpha=0.5)414243def fill_with_latest_value(array, length):44pad = length - len(array)45if pad > 0:46return np.pad(array, (0, pad), mode="edge")47else:48return array495051def plot_episodes_availability_averaged(results):52"""Plot availability for a given set of specified episodes"""53data = results["all_episodes_availability"]54longest_episode_length = np.max([len(r) for r in data])5556all_episodes_padded = [fill_with_latest_value(av, longest_episode_length) for av in data]57avg = np.average(all_episodes_padded, axis=0)58std = np.std(all_episodes_padded, axis=0)59x = [i for i in range(len(std))]60plt.plot(x, avg, label=results["title"])61plt.fill_between(x, avg - std, avg + std, alpha=0.5)626364def plot_episodes_length(learning_results):65"""Plot length of every episode"""66plt.figure(figsize=(10, 8))67plt.ylabel("#iterations", fontsize=20)68plt.xlabel("episode", fontsize=20)69plt.xticks(size=20)70plt.yticks(size=20)71plt.title("Length of each episode", fontsize=12)7273for results in learning_results:74iterations = [len(e) for e in results["all_episodes_rewards"]]75episode = [i for i in range(len(results["all_episodes_rewards"]))]76plt.plot(episode, iterations, label=f"{results['title']}")7778plt.legend(loc="upper right")79plt.show()808182def plot_each_episode(results):83"""Plot cumulative rewards for each episode"""84for i, episode in enumerate(results["all_episodes_rewards"]):85cumrewards = np.cumsum(episode)86x = [i for i in range(len(cumrewards))]87plt.plot(x, cumrewards, label=f"Episode {i}")888990def plot_all_episodes(r):91"""Plot cumulative rewards for every episode"""92new_plot(r["title"])93plot_each_episode(r)94plt.legend(loc="lower right")95plt.show()969798def plot_averaged_cummulative_rewards(title, all_runs, show=True, save_at=None):99"""Plot averaged cumulative rewards"""100new_plot(title)101for r in all_runs:102plot_episodes_rewards_averaged(r)103plt.legend(loc="lower right")104if save_at:105plt.savefig(save_at)106if show:107plt.show()108109110def plot_averaged_availability(title, all_runs, show=False):111"""Plot averaged network availability"""112plt.figure(figsize=(10, 8))113plt.ylabel("network availability", fontsize=20)114plt.xlabel("step", fontsize=20)115plt.xticks(size=20)116plt.yticks(size=20)117plt.title(title, fontsize=12)118for r in all_runs:119plot_episodes_availability_averaged(r)120plt.legend(loc="lower right")121if show:122plt.show()123124125def new_plot_loss():126"""Plot MSE loss averaged over all episodes"""127plt.figure(figsize=(10, 8))128plt.ylabel("loss", fontsize=20)129plt.xlabel("episodes", fontsize=20)130plt.xticks(size=12)131plt.yticks(size=20)132plt.title("Loss", fontsize=12)133134135def plot_all_episodes_loss(all_episodes_losses, name, label):136"""Plot loss for one learning episode"""137x = [i for i in range(len(all_episodes_losses))]138plt.plot(x, all_episodes_losses, label=f"{name} {label}")139140141def running_mean(x: np.ndarray, size):142"""return moving average of x for a window of lenght 'size'"""143cumsum = np.cumsum(np.insert(x, 0, 0))144return np.subtract(cumsum[size:], cumsum[:-size]) / float(size)145146147class PlotTraining:148"""Plot training-related stats"""149150def __init__(self, title, render_each_episode):151self.episode_durations = []152self.title = title153self.render_each_episode = render_each_episode154155def plot_durations(self, average_window=5):156# plt.figure(2)157plt.figure()158# plt.clf()159durations_t = np.array(self.episode_durations, dtype=np.float32)160plt.title("Training...")161plt.xlabel("Episode")162plt.ylabel("Duration")163plt.title(self.title, fontsize=12)164165episodes = [i + 1 for i in range(len(self.episode_durations))]166plt.plot(episodes, durations_t)167# plot episode running averages168if len(durations_t) >= average_window:169means = running_mean(durations_t, average_window)170means = np.concatenate((np.zeros(average_window - 1), means))171plt.plot(episodes, means)172173# display.display(plt.gcf())174plt.show()175176def episode_done(self, length):177self.episode_durations.append(length)178if self.render_each_episode:179self.plot_durations()180181def plot_end(self):182self.plot_durations()183plt.ioff() # type: ignore184# plt.show()185186187def length_of_all_episodes(run):188"""Get the length of every episode"""189return [len(e) for e in run["all_episodes_rewards"]]190191192def reduce(x, desired_width):193return [np.average(c) for c in np.array_split(x, desired_width)]194195196def episodes_rewards_averaged(run):197"""Plot cumulative rewards for a given set of specified episodes"""198max_iteration_count = np.max([len(r) for r in run["all_episodes_rewards"]])199all_episodes_rewards_padded = [pad(rewards, max_iteration_count) for rewards in run["all_episodes_rewards"]]200cumrewards = np.cumsum(all_episodes_rewards_padded, axis=1)201avg = np.average(cumrewards, axis=0)202return list(avg)203204205def episodes_lengths_for_all_runs(all_runs):206return [length_of_all_episodes(run) for run in all_runs]207208209def averaged_cummulative_rewards(all_runs, width):210return [reduce(episodes_rewards_averaged(run), width) for run in all_runs]211212213