Coverage for o2/ppo_utils/ppo_env.py: 100%
27 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-16 11:18 +0000
1from typing import Optional
3import numpy as np
4from gymnasium import Env, Space
6from o2.ppo_utils.ppo_input import PPOInput
7from o2.store import Store
9StateType = dict[str, Space]
12class PPOEnv(Env[StateType, np.int64]):
13 """The Environment for the PPO algorithm.
15 Centered around the Store object, which contains the current state of the
16 optimization problem.
17 """
19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None:
20 super().__init__()
22 self.store = initial_store
23 self.max_steps = max_steps
25 self.actions = PPOInput.get_actions_from_store(self.store)
26 self.action_space = PPOInput.get_action_space_from_actions(self.actions)
27 self.observation_space = PPOInput.get_observation_space(self.store)
28 self.state = PPOInput.get_state_from_store(self.store)
29 self.stepCount = 0
30 self.iteration = 0
32 def reset(
33 self,
34 *,
35 seed: Optional[int] = None,
36 options: Optional[dict] = None,
37 ) -> tuple[StateType, dict]:
38 """Reset the environment to its initial state.
40 Increments the iteration counter and reinitializes the store and state.
41 """
42 super().reset(seed=seed)
43 self.stepCount = 0
44 self.iteration += 1
45 self.store = Store(self.store.base_solution, self.store.constraints)
46 self.actions = PPOInput.get_actions_from_store(self.store)
47 self.state = PPOInput.get_state_from_store(self.store)
48 self.action_space = PPOInput.get_action_space_from_actions(self.actions)
49 self.observation_space = PPOInput.get_observation_space(self.store)
50 return self.state, {}
52 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]:
53 """Take an action in the environment.
55 Not implemented in this environment as we use custom PPO implementation.
56 """
57 # As we are reimplementing PPO in parts, the step function is actually not needed.
58 # So we raise an exception to avoid using it.
59 raise Exception("PPOEnv does not support step")
61 def action_masks(self) -> np.ndarray:
62 """Get the action mask for the current set of actions."""
63 # As we are reimplementing PPO in parts, the action_masks function is actually not needed.
64 # So we raise an exception to avoid using it.
65 raise Exception("PPOEnv does not support action_masks")
67 def render(self, mode: str = "human") -> None:
68 """Render the current state of the environment.
70 Not implemented for this environment.
71 """
72 pass