Coverage for o2/ppo_utils/ppo_env

1from typing import Optional

3import numpy as np

4from gymnasium import Env, spaces

6from o2.ppo_utils.ppo_input import PPOInput

7from o2.store import Store

9StateType = dict[str, float]

12class PPOEnvRandom(Env[StateType, np.int64]):

13 """The Environment for the PPO algorithm.

15 Centered around the Store object, which contains the current state of the

16 optimization problem.

17 """

19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None:

20 super().__init__()

22 self.store = initial_store

23 self.max_steps = max_steps

25 self.actions = PPOInput.get_actions_from_store(self.store)

26 self.action_space = PPOInput.get_action_space_from_actions(self.actions)

27 self.observation_space = spaces.Dict(

28 {

29 "random": spaces.Box(low=0, high=1, shape=(1, 1)),

30 }

31 )

32 self.state = {"random": np.random.random()}

33 self.stepCount = 0

34 self.iteration = 0

36 def reset(

37 self,

38 *,

39 seed: Optional[int] = None,

40 options: Optional[dict] = None,

41 ) -> tuple[StateType, dict]:

42 """Reset the environment to its initial state.

44 Increments the iteration counter and reinitializes the store and state

45 for random action selection.

46 """

47 super().reset(seed=seed)

48 self.stepCount = 0

49 self.iteration += 1

50 self.store = Store(self.store.base_solution, self.store.constraints)

51 self.actions = PPOInput.get_actions_from_store(self.store)

52 self.state = {"random": np.random.random()}

53 self.action_space = PPOInput.get_action_space_from_actions(self.actions)

54 self.observation_space = spaces.Dict(

55 {

56 "random": spaces.Box(low=0, high=1, shape=(1, 1)),

57 }

58 )

59 return self.state, {}

61 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]:

62 """Take an action in the environment.

64 Not implemented in this random environment as we use custom PPO implementation.

65 """

66 # As we are reimplementing PPO in parts, the step function is actually not needed.

67 # So we raise an exception to avoid using it.

68 raise Exception("PPOEnvRandom does not support step")

70 def action_masks(self) -> np.ndarray:

71 """Get the action mask for the current set of actions."""

72 # As we are reimplementing PPO in parts, the action_masks function is actually not needed.

73 # So we raise an exception to avoid using it.

74 raise Exception("PPOEnvRandom does not support action_masks")

76 def render(self, mode: str = "human") -> None:

77 """Render the current state of the environment.

79 Not implemented for this random environment.

80 """

81 pass

Coverage for o2/ppo_utils/ppo_env_random.py: 67%

27 statements