Coverage for o2/ppo_utils/ppo_env.py: 100%

27 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-05-16 11:18 +0000

1from typing import Optional 

2 

3import numpy as np 

4from gymnasium import Env, Space 

5 

6from o2.ppo_utils.ppo_input import PPOInput 

7from o2.store import Store 

8 

9StateType = dict[str, Space] 

10 

11 

12class PPOEnv(Env[StateType, np.int64]): 

13 """The Environment for the PPO algorithm. 

14 

15 Centered around the Store object, which contains the current state of the 

16 optimization problem. 

17 """ 

18 

19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None: 

20 super().__init__() 

21 

22 self.store = initial_store 

23 self.max_steps = max_steps 

24 

25 self.actions = PPOInput.get_actions_from_store(self.store) 

26 self.action_space = PPOInput.get_action_space_from_actions(self.actions) 

27 self.observation_space = PPOInput.get_observation_space(self.store) 

28 self.state = PPOInput.get_state_from_store(self.store) 

29 self.stepCount = 0 

30 self.iteration = 0 

31 

32 def reset( 

33 self, 

34 *, 

35 seed: Optional[int] = None, 

36 options: Optional[dict] = None, 

37 ) -> tuple[StateType, dict]: 

38 """Reset the environment to its initial state. 

39 

40 Increments the iteration counter and reinitializes the store and state. 

41 """ 

42 super().reset(seed=seed) 

43 self.stepCount = 0 

44 self.iteration += 1 

45 self.store = Store(self.store.base_solution, self.store.constraints) 

46 self.actions = PPOInput.get_actions_from_store(self.store) 

47 self.state = PPOInput.get_state_from_store(self.store) 

48 self.action_space = PPOInput.get_action_space_from_actions(self.actions) 

49 self.observation_space = PPOInput.get_observation_space(self.store) 

50 return self.state, {} 

51 

52 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]: 

53 """Take an action in the environment. 

54 

55 Not implemented in this environment as we use custom PPO implementation. 

56 """ 

57 # As we are reimplementing PPO in parts, the step function is actually not needed. 

58 # So we raise an exception to avoid using it. 

59 raise Exception("PPOEnv does not support step") 

60 

61 def action_masks(self) -> np.ndarray: 

62 """Get the action mask for the current set of actions.""" 

63 # As we are reimplementing PPO in parts, the action_masks function is actually not needed. 

64 # So we raise an exception to avoid using it. 

65 raise Exception("PPOEnv does not support action_masks") 

66 

67 def render(self, mode: str = "human") -> None: 

68 """Render the current state of the environment. 

69 

70 Not implemented for this environment. 

71 """ 

72 pass