# ---1# jupyter:2# jupytext:3# formats: py:percent,ipynb4# text_representation:5# extension: .py6# format_name: percent7# format_version: '1.3'8# jupytext_version: 1.16.49# kernelspec:10# display_name: Python 3 (ipykernel)11# language: python12# name: python313# ---1415# %%16# %% [markdown] magic_args="[markdown]"17# Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.18#19# # Random agent playing the Capture The Flag toy environment2021# %%22import sys23import logging24import gymnasium as gym2526logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s: %(message)s")27# %matplotlib inline2829# %% [markdown]3031# ### CyberBattle simulation32# - **Environment**: a network of nodes with assigned vulnerabilities/functionalities, value, and firewall configuration33# - **Action space**: local attack | remote attack | authenticated connection34# - **Observation**: effects of action on environment3536# %%37from typing import cast38from cyberbattle._env.cyberbattle_env import CyberBattleEnv3940_gym_env = gym.make("CyberBattleToyCtf-v0")4142gym_env = cast(CyberBattleEnv, _gym_env)4344# %%45gym_env.environment4647# %%48gym_env.action_space4950# %%51gym_env.action_space.sample()5253# %% [markdown]54# ## A random agent5556# %%57for i_episode in range(1):58observation, _ = gym_env.reset()5960total_reward = 06162for t in range(5600):63action = gym_env.sample_valid_action()6465observation, reward, done, _, info = gym_env.step(action)6667total_reward += reward6869if reward > 0:70print("####### rewarded action: {action}")71print(f"total_reward={total_reward} reward={reward}")72gym_env.render()7374if done:75print("Episode finished after {} timesteps".format(t + 1))76break7778gym_env.render()7980gym_env.close()81print("simulation ended")8283# %% [markdown]84# ### End of simulation8586# %%878889