Skip to content
Open
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@

# 'details',
#'exception_hierarchy',

# for pdf
# 'rst2pdf.pdfbuilder'
# 'rst2pdf.pdfbuilder',
'sphinx.ext.mathjax'
]
# Add any paths that contain templates here, relative to this directory.
templates_path = [] #'_templates']
Expand Down
1 change: 1 addition & 0 deletions docs/reward.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ At time of writing the available reward functions is :
- :class:`LinesReconnectedReward`
- :class:`N1Reward`
- :class:`RedispReward`
- :class:`ShapedReward`

In the provided reward you have also some convenience functions to combine different reward. These are:

Expand Down
4 changes: 3 additions & 1 deletion grid2op/Reward/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
"AlarmReward",
"N1Reward",
# TODO it would be better to have a specific package for this, but in the mean time i put it here
"ShapedReward",
"L2RPNSandBoxScore",
"L2RPNWCCI2022ScoreFun",
"AlertReward",
"_AlarmScore",
"_NewRenewableSourcesUsageScore",
"_AlertCostScore",
"_AlertTrustScore"
"_AlertTrustScore",
]

from grid2op.Reward.constantReward import ConstantReward
Expand All @@ -49,6 +50,7 @@
from grid2op.Reward.alarmReward import AlarmReward
from grid2op.Reward._alarmScore import _AlarmScore
from grid2op.Reward.n1Reward import N1Reward
from grid2op.Reward.shapedReward import ShapedReward
from grid2op.Reward.l2rpn_wcci2022_scorefun import L2RPNWCCI2022ScoreFun
from grid2op.Reward.alertReward import AlertReward
from grid2op.Reward._newRenewableSourcesUsageScore import _NewRenewableSourcesUsageScore
Expand Down
87 changes: 87 additions & 0 deletions grid2op/Reward/shapedReward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import numpy as np
from grid2op.Reward.baseReward import BaseReward
from grid2op.dtypes import dt_float
import math


class ShapedReward(BaseReward):
"""
This reward is based on the cumulative sum of all overflowing line loads, which the agent aims to minimize.

This rewards is computed as followed:

- We first calculate the coefficient u, which summarizes the (overflowing) line loads.

- If rho_max < 1,i.e, there is currently no overflow, and line loads of all lines are within the allowed bounds, u is
calculated as:

u = max(rho_max - 0.5, 0) ; where rho_max is the maximum overflow value in the grid

(If rho_max - 0.5 is positive or zero, it will return :math:rho_max - 0.5`, else it will return 0.)

- If rho_max > 1, u is calculated as:

u = Σ (rho_i - 0.5) for each i in [1, n] and rho_i > 1 ; where `n` is the number of power lines in the grid and Σ denotes summation.

Then, utilizing u calculated above, we take into account offline lines and apply exponential decay to obtain the shaped reward r as:

r = exp(-u - 0.5*n_offline) ;
where n_offline is the number of lines which are currently offline as a result of an overflow or agent’s actions (i.e.,
we do not consider lines that are offline because of maintenance or opponent attacks).

Examples
---------
You can use this reward in any environment with:

.. code-block:: python

import grid2op
from grid2op.Reward import ShapedReward

# then you create your environment with it:
NAME_OF_THE_ENVIRONMENT = "l2rpn_case14_sandbox"
env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=ShapedReward)
# and do a step with a "do nothing" action
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# the reward is computed with the ShapedReward class

"""
def __init__(self, logger=None):
BaseReward.__init__(self, logger=logger)


def initialize(self, env):
self.reward_min = dt_float(0.0)


def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):

if not is_done and not has_error:
res = self.line_overflowing_sum(env)

else:
res = self.reward_min

return res


@staticmethod
def line_overflowing_sum(env):
obs = env.current_obs

if obs.rho.max()<=1:
u = max([obs.rho.max()- 0.5, 0])

else:
u = sum([rho-0.5 for rho in obs.rho if rho>1])

lines_disconnected = np.sum(obs.line_status == False)
lines_in_maintenance = np.sum(obs.time_next_maintenance==0)
lines_under_attack = np.sum(obs.time_since_last_attack>=0)
n_offline = lines_disconnected - lines_in_maintenance - lines_under_attack

reward = math.exp(-u-0.5*n_offline)

return reward

6 changes: 5 additions & 1 deletion grid2op/tests/test_Reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from grid2op.Parameters import Parameters
from grid2op.Runner import Runner
from grid2op.Agent import BaseAgent

from grid2op.Reward.shapedReward import ShapedReward
import warnings


Expand Down Expand Up @@ -88,6 +88,10 @@ def _reward_type(self):
class TestLoadingLinesCapacityReward(TestLoadingReward, unittest.TestCase):
def _reward_type(self):
return LinesCapacityReward

class TestLoadingShapedReward(TestLoadingReward, unittest.TestCase):
def _reward_type(self):
return ShapedReward


class TestDistanceReward(TestLoadingReward, unittest.TestCase):
Expand Down