Path: blob/main/notebooks/notebook_randlookups.py
597 views
# ---1# jupyter:2# jupytext:3# cell_metadata_filter: -all4# formats: py:percent,ipynb5# text_representation:6# extension: .py7# format_name: percent8# format_version: '1.3'9# jupytext_version: 1.16.410# kernelspec:11# display_name: Python 3 (ipykernel)12# language: python13# name: python314# ---1516# %%17# Copyright (c) Microsoft Corporation.18# Licensed under the MIT License.1920"""Random exploration with credential lookup exploitation (notebook)2122This notebooks can be run directly from VSCode, to generate a23traditional Jupyter Notebook to open in your browser24you can run the VSCode command `Export Currenty Python File As Jupyter Notebook`.25"""2627# pylint: disable=invalid-name2829# %%30import os31import gymnasium as gym32import logging33import sys34from cyberbattle._env.cyberbattle_env import AttackerGoal35from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter36import cyberbattle.agents.baseline.learner as learner37import cyberbattle.agents.baseline.plotting as p38import cyberbattle.agents.baseline.agent_wrapper as w39from cyberbattle.agents.baseline.agent_wrapper import Verbosity40from cyberbattle._env.cyberbattle_env import CyberBattleEnv4142# %%43# %matplotlib inline4445# %%46logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")474849# %%50cyberbattlechain_10 = gym.make("CyberBattleChain-v0", size=10, attacker_goal=AttackerGoal(own_atleast_percent=1.0)).unwrapped51assert isinstance(cyberbattlechain_10, CyberBattleEnv)5253# %%54ep = w.EnvironmentBounds.of_identifiers(maximum_total_credentials=12, maximum_node_count=12, identifiers=cyberbattlechain_10.identifiers)5556# %% {"tags": ["parameters"]}57iteration_count = 900058training_episode_count = 5059eval_episode_count = 560plots_dir = 'plots'6162# %%63os.makedirs(plots_dir, exist_ok=True)6465credexplot = learner.epsilon_greedy_search(66cyberbattlechain_10,67learner=CredentialCacheExploiter(),68environment_properties=ep,69episode_count=training_episode_count,70iteration_count=iteration_count,71epsilon=0.90,72render=False,73epsilon_multdecay=0.75, # 0.999,74epsilon_minimum=0.01,75verbosity=Verbosity.Quiet,76title="Random+CredLookup",77)7879# %%80randomlearning_results = learner.epsilon_greedy_search(81cyberbattlechain_10,82environment_properties=ep,83learner=CredentialCacheExploiter(),84episode_count=eval_episode_count,85iteration_count=iteration_count,86epsilon=1.0, # purely random87render=False,88verbosity=Verbosity.Quiet,89title="Random search",90)919293# %%94p.plot_episodes_length([credexplot])9596p.plot_all_episodes(credexplot)9798all_runs = [credexplot, randomlearning_results]99p.plot_averaged_cummulative_rewards(title=f"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n", all_runs=all_runs,100save_at=os.path.join(plots_dir, "randlookups-cumreward.png"))101102103