Coverage for o2/ppo_utils/ppo_env_random.py: 67%

27 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-05-16 11:18 +0000

1from typing import Optional 

2 

3import numpy as np 

4from gymnasium import Env, spaces 

5 

6from o2.ppo_utils.ppo_input import PPOInput 

7from o2.store import Store 

8 

9StateType = dict[str, float] 

10 

11 

12class PPOEnvRandom(Env[StateType, np.int64]): 

13 """The Environment for the PPO algorithm. 

14 

15 Centered around the Store object, which contains the current state of the 

16 optimization problem. 

17 """ 

18 

19 def __init__(self, initial_store: Store, max_steps: float = float("inf")) -> None: 

20 super().__init__() 

21 

22 self.store = initial_store 

23 self.max_steps = max_steps 

24 

25 self.actions = PPOInput.get_actions_from_store(self.store) 

26 self.action_space = PPOInput.get_action_space_from_actions(self.actions) 

27 self.observation_space = spaces.Dict( 

28 { 

29 "random": spaces.Box(low=0, high=1, shape=(1, 1)), 

30 } 

31 ) 

32 self.state = {"random": np.random.random()} 

33 self.stepCount = 0 

34 self.iteration = 0 

35 

36 def reset( 

37 self, 

38 *, 

39 seed: Optional[int] = None, 

40 options: Optional[dict] = None, 

41 ) -> tuple[StateType, dict]: 

42 """Reset the environment to its initial state. 

43 

44 Increments the iteration counter and reinitializes the store and state 

45 for random action selection. 

46 """ 

47 super().reset(seed=seed) 

48 self.stepCount = 0 

49 self.iteration += 1 

50 self.store = Store(self.store.base_solution, self.store.constraints) 

51 self.actions = PPOInput.get_actions_from_store(self.store) 

52 self.state = {"random": np.random.random()} 

53 self.action_space = PPOInput.get_action_space_from_actions(self.actions) 

54 self.observation_space = spaces.Dict( 

55 { 

56 "random": spaces.Box(low=0, high=1, shape=(1, 1)), 

57 } 

58 ) 

59 return self.state, {} 

60 

61 def step(self, action: np.int64) -> tuple[StateType, float, bool, bool, dict]: 

62 """Take an action in the environment. 

63 

64 Not implemented in this random environment as we use custom PPO implementation. 

65 """ 

66 # As we are reimplementing PPO in parts, the step function is actually not needed. 

67 # So we raise an exception to avoid using it. 

68 raise Exception("PPOEnvRandom does not support step") 

69 

70 def action_masks(self) -> np.ndarray: 

71 """Get the action mask for the current set of actions.""" 

72 # As we are reimplementing PPO in parts, the action_masks function is actually not needed. 

73 # So we raise an exception to avoid using it. 

74 raise Exception("PPOEnvRandom does not support action_masks") 

75 

76 def render(self, mode: str = "human") -> None: 

77 """Render the current state of the environment. 

78 

79 Not implemented for this random environment. 

80 """ 

81 pass