Coverage for o2/agents/ppo_agent_random.py: 83%
23 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
1import os
3from typing_extensions import override
5from o2.agents.ppo_agent import PPOAgent
6from o2.ppo_utils.ppo_env_random import PPOEnvRandom
8os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
10import gymnasium as gym
11import numpy as np
12from gymnasium import spaces
14from o2.ppo_utils.ppo_input import PPOInput
15from o2.store import Store
18class PPOAgentRandom(PPOAgent):
19 """Selects the best action to take next, based on the current state of the store."""
21 def __init__(self, store: Store) -> None:
22 super().__init__(store)
24 self.store.settings.disable_action_validity_check = True
26 @override
27 def get_env(self) -> gym.Env:
28 """Get the environment for the PPO agent."""
29 return PPOEnvRandom(self.store, max_steps=self.store.settings.ppo_steps_per_iteration)
31 @override
32 def update_state(self) -> None:
33 """Update the state of the agent."""
34 self.actions = PPOInput.get_actions_from_store(self.store)
35 self.action_space = PPOInput.get_action_space_from_actions(self.actions)
36 self.observation_space = spaces.Dict(
37 {
38 "random": spaces.Box(low=0, high=1, shape=(1, 1)),
39 }
40 )
41 self.state = {"random": np.array([[np.random.random()]])}