Path: blob/main/notebooks/notebook_withdefender.py
597 views
# ---1# jupyter:2# jupytext:3# cell_metadata_filter: -all4# formats: py:percent,ipynb5# text_representation:6# extension: .py7# format_name: percent8# format_version: '1.3'9# jupytext_version: 1.16.410# kernelspec:11# display_name: Python 3 (ipykernel)12# language: python13# name: python314# ---1516# %%17# Copyright (c) Microsoft Corporation.18# Licensed under the MIT License.1920"""Attacker agent benchmark comparison in presence of a basic defender2122This notebooks can be run directly from VSCode, to generate a23traditional Jupyter Notebook to open in your browser24you can run the VSCode command `Export Currenty Python File As Jupyter Notebook`.25"""2627# %%28import sys29import os30import logging31import gymnasium as gym32import importlib33import cyberbattle.agents.baseline.learner as learner34import cyberbattle.agents.baseline.plotting as p35import cyberbattle.agents.baseline.agent_wrapper as w36import cyberbattle.agents.baseline.agent_dql as dqla37import cyberbattle.agents.baseline.agent_randomcredlookup as rca38from cyberbattle.agents.baseline.agent_wrapper import Verbosity39from cyberbattle._env.defender import ScanAndReimageCompromisedMachines40from cyberbattle._env.cyberbattle_env import AttackerGoal, DefenderConstraint, CyberBattleEnv4142importlib.reload(learner)43importlib.reload(p)44importlib.reload(p)4546logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")47# %matplotlib inline484950# %% {"tags": ["parameters"]}51iteration_count = 60052training_episode_count = 1053plots_dir = "output/plots"5455# %%56gym_env = gym.make(57"CyberBattleChain-v0",58size=10,59attacker_goal=AttackerGoal(own_atleast=0, own_atleast_percent=1.0),60defender_constraint=DefenderConstraint(maintain_sla=0.80),61defender_agent=ScanAndReimageCompromisedMachines(probability=0.6, scan_capacity=2, scan_frequency=5),62).unwrapped6364cyberbattlechain_defender = gym_env.unwrapped65assert isinstance(cyberbattlechain_defender, CyberBattleEnv)666768ep = w.EnvironmentBounds.of_identifiers(maximum_total_credentials=22, maximum_node_count=22, identifiers=cyberbattlechain_defender.identifiers)6970# %%71dqn_with_defender = learner.epsilon_greedy_search(72cyberbattle_gym_env=cyberbattlechain_defender,73environment_properties=ep,74learner=dqla.DeepQLearnerPolicy(ep=ep, gamma=0.15, replay_memory_size=10000, target_update=5, batch_size=256, learning_rate=0.01),75episode_count=training_episode_count,76iteration_count=iteration_count,77epsilon=0.90,78render=False,79epsilon_exponential_decay=5000,80epsilon_minimum=0.10,81verbosity=Verbosity.Quiet,82title="DQL",83)848586# %%87dql_exploit_run = learner.epsilon_greedy_search(88cyberbattlechain_defender,89ep,90learner=dqn_with_defender["learner"],91episode_count=training_episode_count,92iteration_count=iteration_count,93epsilon=0.0, # 0.35,94render=False,95# render_last_episode_rewards_to='images/chain10',96verbosity=Verbosity.Quiet,97title="Exploiting DQL",98)99100# %%101credlookup_run = learner.epsilon_greedy_search(102cyberbattlechain_defender,103ep,104learner=rca.CredentialCacheExploiter(),105episode_count=10,106iteration_count=iteration_count,107epsilon=0.90,108render=False,109epsilon_exponential_decay=10000,110epsilon_minimum=0.10,111verbosity=Verbosity.Quiet,112title="Credential lookups (ϵ-greedy)",113)114115116# %%117# Plots118all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]119p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f"Attacker agents vs Basic Defender -- rewards\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}", save_at=os.path.join(plots_dir, "withdefender-cumreward.png"))120# p.plot_episodes_length(all_runs)121p.plot_averaged_availability(title=f"Attacker agents vs Basic Defender -- availability\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}", all_runs=all_runs, show=False)122123124