Coverage for o2/ppo_utils/ppo_env_random.py: 67%
27 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
1from typing import Optional
3import numpy as np
4from gymnasium import Env, spaces
6from o2.ppo_utils.ppo_input import PPOInput
7from o2.store import Store
9StateType = dict[str, float]
12class PPOEnvRandom(Env[StateType, np.int64]):
13 """The Environment for the PPO algorithm.
15 Centered around the Store object, which contains the current state of the
16 optimization problem.
17 """
19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None:
20 super().__init__()
22 self.store = initial_store
23 self.max_steps = max_steps
25 self.actions = PPOInput.get_actions_from_store(self.store)
26 self.action_space = PPOInput.get_action_space_from_actions(self.actions)
27 self.observation_space = spaces.Dict(
28 {
29 "random": spaces.Box(low=0, high=1, shape=(1, 1)),
30 }
31 )
32 self.state = {"random": np.random.random()}
33 self.stepCount = 0
34 self.iteration = 0
36 def reset(
37 self,
38 *,
39 seed: Optional[int] = None,
40 options: Optional[dict] = None,
41 ) -> tuple[StateType, dict]:
42 """Reset the environment to its initial state.
44 Increments the iteration counter and reinitializes the store and state
45 for random action selection.
46 """
47 super().reset(seed=seed)
48 self.stepCount = 0
49 self.iteration += 1
50 self.store = Store(self.store.base_solution, self.store.constraints)
51 self.actions = PPOInput.get_actions_from_store(self.store)
52 self.state = {"random": np.random.random()}
53 self.action_space = PPOInput.get_action_space_from_actions(self.actions)
54 self.observation_space = spaces.Dict(
55 {
56 "random": spaces.Box(low=0, high=1, shape=(1, 1)),
57 }
58 )
59 return self.state, {}
61 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]:
62 """Take an action in the environment.
64 Not implemented in this random environment as we use custom PPO implementation.
65 """
66 # As we are reimplementing PPO in parts, the step function is actually not needed.
67 # So we raise an exception to avoid using it.
68 raise Exception("PPOEnvRandom does not support step")
70 def action_masks(self) -> np.ndarray:
71 """Get the action mask for the current set of actions."""
72 # As we are reimplementing PPO in parts, the action_masks function is actually not needed.
73 # So we raise an exception to avoid using it.
74 raise Exception("PPOEnvRandom does not support action_masks")
76 def render(self, mode: str = "human") -> None:
77 """Render the current state of the environment.
79 Not implemented for this random environment.
80 """
81 pass