Coverage for o2/ppo_utils/ppo

1from typing import Optional

3import numpy as np

4from gymnasium import Env, Space

6from o2.ppo_utils.ppo_input import PPOInput

7from o2.store import Store

9StateType = dict[str, Space]

12class PPOEnv(Env[StateType, np.int64]):

13 """The Environment for the PPO algorithm.

15 Centered around the Store object, which contains the current state of the

16 optimization problem.

17 """

19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None:

20 super().__init__()

22 self.store = initial_store

23 self.max_steps = max_steps

25 self.actions = PPOInput.get_actions_from_store(self.store)

26 self.action_space = PPOInput.get_action_space_from_actions(self.actions)

27 self.observation_space = PPOInput.get_observation_space(self.store)

28 self.state = PPOInput.get_state_from_store(self.store)

29 self.stepCount = 0

30 self.iteration = 0

32 def reset(

33 self,

34 *,

35 seed: Optional[int] = None,

36 options: Optional[dict] = None,

37 ) -> tuple[StateType, dict]:

38 """Reset the environment to its initial state.

40 Increments the iteration counter and reinitializes the store and state.

41 """

42 super().reset(seed=seed)

43 self.stepCount = 0

44 self.iteration += 1

45 self.store = Store(self.store.base_solution, self.store.constraints)

46 self.actions = PPOInput.get_actions_from_store(self.store)

47 self.state = PPOInput.get_state_from_store(self.store)

48 self.action_space = PPOInput.get_action_space_from_actions(self.actions)

49 self.observation_space = PPOInput.get_observation_space(self.store)

50 return self.state, {}

52 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]:

53 """Take an action in the environment.

55 Not implemented in this environment as we use custom PPO implementation.

56 """

57 # As we are reimplementing PPO in parts, the step function is actually not needed.

58 # So we raise an exception to avoid using it.

59 raise Exception("PPOEnv does not support step")

61 def action_masks(self) -> np.ndarray:

62 """Get the action mask for the current set of actions."""

63 # As we are reimplementing PPO in parts, the action_masks function is actually not needed.

64 # So we raise an exception to avoid using it.

65 raise Exception("PPOEnv does not support action_masks")

67 def render(self, mode: str = "human") -> None:

68 """Render the current state of the environment.

70 Not implemented for this environment.

71 """

72 pass

Coverage for o2/ppo_utils/ppo_env.py: 100%

27 statements