rootcodelabs · munsif135 · May 16, 2024 · May 20, 2024 · May 22, 2024 · May 22, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -49,9 +49,9 @@
 
     # 'details',
     #'exception_hierarchy',
-
     # for pdf
-    # 'rst2pdf.pdfbuilder'
+    # 'rst2pdf.pdfbuilder',
+    'sphinx.ext.mathjax'
 ]
 # Add any paths that contain templates here, relative to this directory.
 templates_path = [] #'_templates']

diff --git a/docs/reward.rst b/docs/reward.rst
@@ -86,6 +86,7 @@ At time of writing the available reward functions is :
 - :class:`LinesReconnectedReward`
 - :class:`N1Reward`
 - :class:`RedispReward`
+- :class:`ShapedReward`
 
 In the provided reward you have also some convenience functions to combine different reward. These are:
 

diff --git a/grid2op/Reward/__init__.py b/grid2op/Reward/__init__.py
@@ -19,13 +19,14 @@
     "AlarmReward",
     "N1Reward",
     # TODO it would be better to have a specific package for this, but in the mean time i put it here
+    "ShapedReward",
     "L2RPNSandBoxScore",
     "L2RPNWCCI2022ScoreFun",
     "AlertReward",
     "_AlarmScore",
     "_NewRenewableSourcesUsageScore",
     "_AlertCostScore",
-    "_AlertTrustScore"
+    "_AlertTrustScore",
 ]
 
 from grid2op.Reward.constantReward import ConstantReward
@@ -49,6 +50,7 @@
 from grid2op.Reward.alarmReward import AlarmReward
 from grid2op.Reward._alarmScore import _AlarmScore
 from grid2op.Reward.n1Reward import N1Reward
+from grid2op.Reward.shapedReward import ShapedReward
 from grid2op.Reward.l2rpn_wcci2022_scorefun import L2RPNWCCI2022ScoreFun
 from grid2op.Reward.alertReward import AlertReward
 from grid2op.Reward._newRenewableSourcesUsageScore import _NewRenewableSourcesUsageScore

diff --git a/grid2op/Reward/shapedReward.py b/grid2op/Reward/shapedReward.py
@@ -0,0 +1,87 @@
+import numpy as np
+from grid2op.Reward.baseReward import BaseReward
+from grid2op.dtypes import dt_float
+import math
+
+
+class ShapedReward(BaseReward):
+    """
+    This reward is based on the cumulative sum of all overflowing line loads, which the agent aims to minimize.
+
+    This rewards is computed as followed:  
+
+    - We first calculate the coefficient u, which summarizes the (overflowing) line loads.
+
+    - If rho_max < 1,i.e, there is currently no overflow, and line loads of all lines are within the allowed bounds, u is
+    calculated as:
+
+          u = max(rho_max - 0.5, 0) ; where rho_max is the maximum overflow value in the grid
+
+      (If rho_max - 0.5 is positive or zero, it will return :math:rho_max - 0.5`, else it will return 0.)
+
+    - If  rho_max > 1, u is calculated as:
+
+          u =  Σ (rho_i - 0.5) for each i in [1, n] and rho_i > 1 ; where  `n` is the number of power lines in the grid and Σ denotes summation.
+
+    Then, utilizing u calculated above, we take into account offline lines and apply exponential decay to obtain the shaped reward r as:
+
+        r = exp(-u - 0.5*n_offline) ; 
+        where n_offline is the number of lines which are currently offline as a result of an overflow or agent’s actions (i.e.,
+    we do not consider lines that are offline because of maintenance or opponent attacks).
+
+    Examples
+    ---------
+    You can use this reward in any environment with:
+
+    .. code-block:: python
+
+        import grid2op
+        from grid2op.Reward import ShapedReward
+
+        # then you create your environment with it:
+        NAME_OF_THE_ENVIRONMENT = "l2rpn_case14_sandbox"
+        env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=ShapedReward)
+        # and do a step with a "do nothing" action
+        obs = env.reset()
+        obs, reward, done, info = env.step(env.action_space())
+        # the reward is computed with the ShapedReward class
+
+    """
+    def __init__(self, logger=None):
+        BaseReward.__init__(self, logger=logger)
+
+
+    def initialize(self, env):
+        self.reward_min = dt_float(0.0)
+
+
+    def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
+
+        if not is_done and not has_error:
+            res = self.line_overflowing_sum(env)
+
+        else:
+            res = self.reward_min
+
+        return res
+
+
+    @staticmethod
+    def line_overflowing_sum(env):
+        obs = env.current_obs
+
+        if obs.rho.max()<=1:
+            u = max([obs.rho.max()- 0.5, 0])
+
+        else:
+            u = sum([rho-0.5 for rho in obs.rho if rho>1])
+
+        lines_disconnected = np.sum(obs.line_status == False)
+        lines_in_maintenance = np.sum(obs.time_next_maintenance==0)
+        lines_under_attack = np.sum(obs.time_since_last_attack>=0)
+        n_offline = lines_disconnected - lines_in_maintenance - lines_under_attack
+
+        reward = math.exp(-u-0.5*n_offline)
+
+        return reward
+
diff --git a/grid2op/tests/test_Reward.py b/grid2op/tests/test_Reward.py
@@ -18,7 +18,7 @@
 from grid2op.Parameters import Parameters
 from grid2op.Runner import Runner
 from grid2op.Agent import BaseAgent
-
+from grid2op.Reward.shapedReward import ShapedReward
 import warnings
 
 
@@ -88,6 +88,10 @@ def _reward_type(self):
 class TestLoadingLinesCapacityReward(TestLoadingReward, unittest.TestCase):
     def _reward_type(self):
         return LinesCapacityReward
+
+class TestLoadingShapedReward(TestLoadingReward, unittest.TestCase):
+    def _reward_type(self):
+        return ShapedReward
 
 
 class TestDistanceReward(TestLoadingReward, unittest.TestCase):