Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/CyberBattleSim
Path: blob/main/notebooks/notebook_withdefender.py
597 views
1
# ---
2
# jupyter:
3
# jupytext:
4
# cell_metadata_filter: -all
5
# formats: py:percent,ipynb
6
# text_representation:
7
# extension: .py
8
# format_name: percent
9
# format_version: '1.3'
10
# jupytext_version: 1.16.4
11
# kernelspec:
12
# display_name: Python 3 (ipykernel)
13
# language: python
14
# name: python3
15
# ---
16
17
# %%
18
# Copyright (c) Microsoft Corporation.
19
# Licensed under the MIT License.
20
21
"""Attacker agent benchmark comparison in presence of a basic defender
22
23
This notebooks can be run directly from VSCode, to generate a
24
traditional Jupyter Notebook to open in your browser
25
you can run the VSCode command `Export Currenty Python File As Jupyter Notebook`.
26
"""
27
28
# %%
29
import sys
30
import os
31
import logging
32
import gymnasium as gym
33
import importlib
34
import cyberbattle.agents.baseline.learner as learner
35
import cyberbattle.agents.baseline.plotting as p
36
import cyberbattle.agents.baseline.agent_wrapper as w
37
import cyberbattle.agents.baseline.agent_dql as dqla
38
import cyberbattle.agents.baseline.agent_randomcredlookup as rca
39
from cyberbattle.agents.baseline.agent_wrapper import Verbosity
40
from cyberbattle._env.defender import ScanAndReimageCompromisedMachines
41
from cyberbattle._env.cyberbattle_env import AttackerGoal, DefenderConstraint, CyberBattleEnv
42
43
importlib.reload(learner)
44
importlib.reload(p)
45
importlib.reload(p)
46
47
logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")
48
# %matplotlib inline
49
50
51
# %% {"tags": ["parameters"]}
52
iteration_count = 600
53
training_episode_count = 10
54
plots_dir = "output/plots"
55
56
# %%
57
gym_env = gym.make(
58
"CyberBattleChain-v0",
59
size=10,
60
attacker_goal=AttackerGoal(own_atleast=0, own_atleast_percent=1.0),
61
defender_constraint=DefenderConstraint(maintain_sla=0.80),
62
defender_agent=ScanAndReimageCompromisedMachines(probability=0.6, scan_capacity=2, scan_frequency=5),
63
).unwrapped
64
65
cyberbattlechain_defender = gym_env.unwrapped
66
assert isinstance(cyberbattlechain_defender, CyberBattleEnv)
67
68
69
ep = w.EnvironmentBounds.of_identifiers(maximum_total_credentials=22, maximum_node_count=22, identifiers=cyberbattlechain_defender.identifiers)
70
71
# %%
72
dqn_with_defender = learner.epsilon_greedy_search(
73
cyberbattle_gym_env=cyberbattlechain_defender,
74
environment_properties=ep,
75
learner=dqla.DeepQLearnerPolicy(ep=ep, gamma=0.15, replay_memory_size=10000, target_update=5, batch_size=256, learning_rate=0.01),
76
episode_count=training_episode_count,
77
iteration_count=iteration_count,
78
epsilon=0.90,
79
render=False,
80
epsilon_exponential_decay=5000,
81
epsilon_minimum=0.10,
82
verbosity=Verbosity.Quiet,
83
title="DQL",
84
)
85
86
87
# %%
88
dql_exploit_run = learner.epsilon_greedy_search(
89
cyberbattlechain_defender,
90
ep,
91
learner=dqn_with_defender["learner"],
92
episode_count=training_episode_count,
93
iteration_count=iteration_count,
94
epsilon=0.0, # 0.35,
95
render=False,
96
# render_last_episode_rewards_to='images/chain10',
97
verbosity=Verbosity.Quiet,
98
title="Exploiting DQL",
99
)
100
101
# %%
102
credlookup_run = learner.epsilon_greedy_search(
103
cyberbattlechain_defender,
104
ep,
105
learner=rca.CredentialCacheExploiter(),
106
episode_count=10,
107
iteration_count=iteration_count,
108
epsilon=0.90,
109
render=False,
110
epsilon_exponential_decay=10000,
111
epsilon_minimum=0.10,
112
verbosity=Verbosity.Quiet,
113
title="Credential lookups (ϵ-greedy)",
114
)
115
116
117
# %%
118
# Plots
119
all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]
120
p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f"Attacker agents vs Basic Defender -- rewards\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}", save_at=os.path.join(plots_dir, "withdefender-cumreward.png"))
121
# p.plot_episodes_length(all_runs)
122
p.plot_averaged_availability(title=f"Attacker agents vs Basic Defender -- availability\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}", all_runs=all_runs, show=False)
123
124