CoCalc -- dql_active

GitHub Repository: microsoft/CyberBattleSim
Path: blob/main/notebooks/dql_active_directory.py
⁵⁹⁷ views
1
# ---
2
# jupyter:
3
#   jupytext:
4
#     formats: py:percent,ipynb
5
#     text_representation:
6
#       extension: .py
7
#       format_name: percent
8
#       format_version: '1.3'
9
#       jupytext_version: 1.16.4
10
#   kernelspec:
11
#     display_name: Python 3 (ipykernel)
12
#     language: python
13
#     name: python3
14
# ---
15

16
# %% [markdown]
17
# # DQL agent running on the Active Directory sample environment
18

19
# %%
20
import logging, sys
21
import gymnasium as gym
22
import cyberbattle.agents.baseline.learner as learner
23
import cyberbattle.agents.baseline.agent_wrapper as w
24
import cyberbattle.agents.baseline.agent_dql as dqla
25
from cyberbattle.agents.baseline.agent_wrapper import ActionTrackingStateAugmentation, AgentWrapper, Verbosity
26

27
logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format="%(levelname)s: %(message)s")
28
# %matplotlib inline
29

30
# %% tags=["parameters"]
31
ngyms = 9
32
iteration_count = 1000
33

34
# %%
35
gymids = [f"ActiveDirectory-v{i}" for i in range(0, ngyms)]
36

37
# %%
38
from typing import cast
39
from cyberbattle._env.cyberbattle_env import CyberBattleEnv
40

41
envs = [cast(CyberBattleEnv, gym.make(gymid).unwrapped) for gymid in gymids]
42
map(lambda g: g.reset(seed=1), envs)
43
ep = w.EnvironmentBounds.of_identifiers(maximum_node_count=30, maximum_total_credentials=50, identifiers=envs[0].identifiers)
44

45
# %%
46
# Evaluate the Deep Q-learning agent for each env using transfer learning
47
_l = dqla.DeepQLearnerPolicy(
48
    ep=ep,
49
    gamma=0.015,
50
    replay_memory_size=10000,
51
    target_update=5,
52
    batch_size=512,
53
    learning_rate=0.01,  # torch default learning rate is 1e-2
54
)
55
for i, env in enumerate(envs):
56
    epsilon = (10 - i) / 10
57
    # at least 1 runs and max 10 for the 10 envs
58
    training_episode_count = 1 + (9 - i)
59
    dqn_learning_run = learner.epsilon_greedy_search(
60
        cyberbattle_gym_env=env,
61
        environment_properties=ep,
62
        learner=_l,
63
        episode_count=training_episode_count,
64
        iteration_count=iteration_count,
65
        epsilon=epsilon,
66
        epsilon_exponential_decay=50000,
67
        epsilon_minimum=0.1,
68
        verbosity=Verbosity.Quiet,
69
        render=False,
70
        plot_episodes_length=False,
71
        title=f"DQL {i}",
72
    )
73
    _l = dqn_learning_run["learner"]
74

75
# %%
76
tiny = cast(CyberBattleEnv, gym.make(f"ActiveDirectory-v{ngyms}"))
77
current_o, _ = tiny.reset()
78
tiny.reset(seed=1)
79
wrapped_env = AgentWrapper(tiny, ActionTrackingStateAugmentation(ep, current_o))
80
# Use the trained agent to run the steps one by one
81
max_steps = 1000
82
# next action suggested by DQL agent
83
# h = []
84
for i in range(max_steps):
85
    # run the suggested action
86
    _, next_action, _ = _l.exploit(wrapped_env, current_o)
87
    # h.append((tiny.get_explored_network_node_properties_bitmap_as_numpy(current_o), next_action))
88
    if next_action is None:
89
        print("No more learned moves")
90
        break
91
    current_o, _, is_done, _, _ = wrapped_env.step(next_action)
92
    if is_done:
93
        print("Finished simulation")
94
        break
95
tiny.render()
96

97
Product

Resources

Company