Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/CyberBattleSim
Path: blob/main/notebooks/dql_active_directory.py
597 views
1
# ---
2
# jupyter:
3
# jupytext:
4
# formats: py:percent,ipynb
5
# text_representation:
6
# extension: .py
7
# format_name: percent
8
# format_version: '1.3'
9
# jupytext_version: 1.16.4
10
# kernelspec:
11
# display_name: Python 3 (ipykernel)
12
# language: python
13
# name: python3
14
# ---
15
16
# %% [markdown]
17
# # DQL agent running on the Active Directory sample environment
18
19
# %%
20
import logging, sys
21
import gymnasium as gym
22
import cyberbattle.agents.baseline.learner as learner
23
import cyberbattle.agents.baseline.agent_wrapper as w
24
import cyberbattle.agents.baseline.agent_dql as dqla
25
from cyberbattle.agents.baseline.agent_wrapper import ActionTrackingStateAugmentation, AgentWrapper, Verbosity
26
27
logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")
28
# %matplotlib inline
29
30
# %% tags=["parameters"]
31
ngyms = 9
32
iteration_count = 1000
33
34
# %%
35
gymids = [f"ActiveDirectory-v{i}" for i in range(0, ngyms)]
36
37
# %%
38
from typing import cast
39
from cyberbattle._env.cyberbattle_env import CyberBattleEnv
40
41
envs = [cast(CyberBattleEnv, gym.make(gymid).unwrapped) for gymid in gymids]
42
map(lambda g: g.reset(seed=1), envs)
43
ep = w.EnvironmentBounds.of_identifiers(maximum_node_count=30, maximum_total_credentials=50, identifiers=envs[0].identifiers)
44
45
# %%
46
# Evaluate the Deep Q-learning agent for each env using transfer learning
47
_l = dqla.DeepQLearnerPolicy(
48
ep=ep,
49
gamma=0.015,
50
replay_memory_size=10000,
51
target_update=5,
52
batch_size=512,
53
learning_rate=0.01, # torch default learning rate is 1e-2
54
)
55
for i, env in enumerate(envs):
56
epsilon = (10 - i) / 10
57
# at least 1 runs and max 10 for the 10 envs
58
training_episode_count = 1 + (9 - i)
59
dqn_learning_run = learner.epsilon_greedy_search(
60
cyberbattle_gym_env=env,
61
environment_properties=ep,
62
learner=_l,
63
episode_count=training_episode_count,
64
iteration_count=iteration_count,
65
epsilon=epsilon,
66
epsilon_exponential_decay=50000,
67
epsilon_minimum=0.1,
68
verbosity=Verbosity.Quiet,
69
render=False,
70
plot_episodes_length=False,
71
title=f"DQL {i}",
72
)
73
_l = dqn_learning_run["learner"]
74
75
# %%
76
tiny = cast(CyberBattleEnv, gym.make(f"ActiveDirectory-v{ngyms}"))
77
current_o, _ = tiny.reset()
78
tiny.reset(seed=1)
79
wrapped_env = AgentWrapper(tiny, ActionTrackingStateAugmentation(ep, current_o))
80
# Use the trained agent to run the steps one by one
81
max_steps = 1000
82
# next action suggested by DQL agent
83
# h = []
84
for i in range(max_steps):
85
# run the suggested action
86
_, next_action, _ = _l.exploit(wrapped_env, current_o)
87
# h.append((tiny.get_explored_network_node_properties_bitmap_as_numpy(current_o), next_action))
88
if next_action is None:
89
print("No more learned moves")
90
break
91
current_o, _, is_done, _, _ = wrapped_env.step(next_action)
92
if is_done:
93
print("Finished simulation")
94
break
95
tiny.render()
96
97