diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 032e1f4c..6c35392d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,4 +1,4 @@
-name: Build rlenvscpp
+name: Build bitrl
on:
push:
@@ -21,7 +21,39 @@ jobs:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
- sudo apt-get install -y g++ cmake libboost-all-dev libgtest-dev libeigen3-dev libblas-dev
+ sudo apt-get install -y \
+ build-essential \
+ cmake \
+ git \
+ libboost-all-dev \
+ libgtest-dev \
+ libeigen3-dev \
+ libblas-dev \
+ libopencv-dev
+ # g++ cmake libboost-all-dev libgtest-dev libeigen3-dev libblas-dev
+ - name: Build and install Paho MQTT C
+ run: |
+ git clone https://github.com/eclipse/paho.mqtt.c.git
+ cd paho.mqtt.c
+ cmake -Bbuild -H. \
+ -DPAHO_WITH_SSL=ON \
+ -DPAHO_BUILD_SHARED=ON \
+ -DPAHO_BUILD_STATIC=OFF
+ cmake --build build
+ sudo cmake --install build
+ sudo ldconfig
+ - name: Build and install Paho MQTT C++
+ run: |
+ git clone https://github.com/eclipse/paho.mqtt.cpp.git
+ cd paho.mqtt.cpp
+ cmake -Bbuild -H. \
+ -DPAHO_WITH_SSL=ON \
+ -DPAHO_BUILD_SHARED=ON \
+ -DPAHO_BUILD_STATIC=OFF
+ cmake --build build
+ sudo cmake --install build
+ sudo ldconfig
+
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 01e81d26..b40517ce 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ MESSAGE(STATUS "Using CMake ${CMAKE_VERSION}")
SET(BITRL_VERSION_MAJOR 1)
-SET(BITRL_VERSION_MINOR 2)
+SET(BITRL_VERSION_MINOR 8)
SET(BITRL_VERSION_PATCH 0)
SET(BITRL_VERSION "${BITRL_VERSION_MAJOR}.${BITRL_VERSION_MINOR}.${BITRL_VERSION_PATCH}")
diff --git a/README.md b/README.md
index d39b5e4e..ca0e47b0 100644
--- a/README.md
+++ b/README.md
@@ -2,15 +2,8 @@
# bitrl
```bitrl``` is an effort to provide implementations and wrappers of environments suitable for training reinforcement learning agents
-using C++.
+using C++. The documentation for the library can be found here.
-Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi.
-See also Working with Webots
-for how to integrate ```bitrl``` with Webots.
-
-Various RL algorithms using the environments can be found at cuberl.
-
-The documentation for the library can be found here.
The following is an example how to use the
``FrozenLake`` environment from Gymnasium.
@@ -18,7 +11,7 @@ The following is an example how to use the
#include "bitrl/bitrl_types.h"
#include "bitrl/envs/gymnasium/toy_text/frozen_lake_env.h"
-#include "bitrl/envs/api_server/apiserver.h"
+#include "bitrl/network/rest_rl_env_client.h"
#include
#include
@@ -32,7 +25,6 @@ const std::string SERVER_URL = "http://0.0.0.0:8001/api";
using bitrl::envs::gymnasium::FrozenLake;
using bitrl::envs::RESTApiServerWrapper;
-
void test_frozen_lake(const RESTApiServerWrapper& server){
FrozenLake<4> env(server);
@@ -40,21 +32,26 @@ void test_frozen_lake(const RESTApiServerWrapper& server){
std::cout<<"Environame URL: "< options;
- options.insert({"is_slippery", false});
- env.make("v1", options);
+ std::unordered_map make_ops;
+ make_ops.insert({"is_slippery", false});
+
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+ env.make("v1", make_ops, reset_ops);
std::cout<<"Is environment created? "<());
+ auto time_step = env.reset();
std::cout<<"Reward on reset: "<bitrl-rest-api
+Various RL algorithms using the environments can be found at cuberl.
+
+Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi.
+See also Working with Webots
+for how to integrate ```bitrl``` with Webots.
+
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 5c840212..0b2894f6 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -25,8 +25,8 @@ IF(BITRL_WEBOTS)
ENDIF()
ADD_SUBDIRECTORY(example_1)
-ADD_SUBDIRECTORY(example_2)
-ADD_SUBDIRECTORY(example_3)
+#ADD_SUBDIRECTORY(example_2)
+#ADD_SUBDIRECTORY(example_3)
#ADD_SUBDIRECTORY(example_4)
ADD_SUBDIRECTORY(example_5)
ADD_SUBDIRECTORY(example_6)
diff --git a/examples/box2d/box2d_example.cpp b/examples/box2d/box2d_example.cpp
index a90aadc4..48587126 100644
--- a/examples/box2d/box2d_example.cpp
+++ b/examples/box2d/box2d_example.cpp
@@ -3,17 +3,19 @@
//
#include "bitrl/envs/gymnasium/box2d/lunar_lander_env.h"
-#include "bitrl/envs/api_server/apiserver.h"
+#include "bitrl/network/rest_rl_env_client.h"
#include
#include
#include
+#include "../../src/bitrl/sensors/ekf_sensor_fusion.h"
+
namespace box2d_example
{
+ using namespace bitrl;
const std::string SERVER_URL = "http://0.0.0.0:8001/api";
using bitrl::real_t;
- using bitrl::envs::RESTApiServerWrapper;
using bitrl::envs::gymnasium::LunarLanderDiscreteEnv;
using bitrl::envs::gymnasium::LunarLanderContinuousEnv;
}
@@ -22,7 +24,7 @@ int main()
{
using namespace box2d_example;
- RESTApiServerWrapper server(SERVER_URL, true);
+ bitrl::network::RESTRLEnvClient server(SERVER_URL, true);
std::unordered_map options;
options["wind_power"] = std::any(static_cast(10.0));
@@ -57,7 +59,9 @@ int main()
std::cout<<"Working with LunarLanderContinuousEnv..."< reset_options;
+ env.make("v3", options, reset_options);
std::cout<<"Is environment created? "< action = {0.8, 0.9};
time_step = env.step(action);
std::cout<<"Time step: "<
#include
@@ -11,6 +11,7 @@
#include
namespace example_1{
+ using namespace bitrl;
const std::string SERVER_URL = "http://0.0.0.0:8001/api";
@@ -18,31 +19,36 @@ using bitrl::envs::gymnasium::FrozenLake;
using bitrl::envs::gymnasium::Taxi;
using bitrl::envs::gymnasium::BlackJack;
using bitrl::envs::gymnasium::CliffWorld;
-using bitrl::envs::RESTApiServerWrapper;
+using bitrl::network::RESTRLEnvClient;
-void test_frozen_lake(const RESTApiServerWrapper& server){
+void test_frozen_lake(RESTRLEnvClient& server){
FrozenLake<4> env(server);
std::cout<<"Environame URL: "< options;
- options.insert({"is_slippery", false});
- env.make("v1", options);
+ std::unordered_map make_ops;
+ make_ops.insert({"is_slippery", false});
+
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+ env.make("v1", make_ops, reset_ops);
std::cout<<"Is environment created? "<());
+ auto time_step = env.reset();
std::cout<<"Reward on reset: "<());
-
- auto copy_env = env.make_copy(1);
- copy_env.reset();
-
- std::cout<<"Org env cidx: "< options;
- env.make("v3", options);
+ std::unordered_map make_ops;
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+ env.make("v3", make_ops, reset_ops);
std::cout<<"Is environment created? "<());
+ auto time_step = env.reset();
std::cout<<"Reward on reset: "<(item)<(item)< options;
options["natural"] = true;
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+
std::cout<<"Environment created..."<());
-
- auto copy_env = env.make_copy(1);
- copy_env.reset();
-
- std::cout<<"Org env cidx: "< options;
options["max_episode_steps"] = std::any(static_cast(10));
- env.make("v0", options);
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+ env.make("v0", options, reset_ops);
std::cout<<"Is environment created? "<());
+ auto time_step = env.reset();
std::cout<<"Reward on reset: "<(item)<());
-
- auto copy_env = env.make_copy(1);
- copy_env.reset();
-
- std::cout<<"Org env cidx: "<
-#include
-
-int main(){
-
- const std::string SERVER_URL = "http://0.0.0.0:8001/api";
-
- bitrl::envs::RESTApiServerWrapper server_wrapper(SERVER_URL);
-
- auto has_gym = server_wrapper.has_gymnasium();
- std::cout<<"Has environment server Gymnasium? "<
-#endif
-
-#include
-#include
-#include
-
-
-namespace example{
-
-using bitrl::uint_t;
-using bitrl::envs::RESTApiServerWrapper;
-const std::string SERVER_URL = "http://0.0.0.0:8001/api";
-const uint_t MAX_TRAJECTORY_SIZE = 10;
-
-typedef bitrl::envs::gymnasium::FrozenLake<4> env_type;
-
-
-auto random_action_selector = [](auto /*state*/){
-
- // randomly select an action
- std::mt19937 gen(42); // mersenne_twister_engine seeded with rd()
- std::uniform_int_distribution<> distrib(0, 3);
- auto action = distrib(gen);
- return action;
-
-};
-
-}
-
-int main(){
-
- using namespace example;
-
- RESTApiServerWrapper server(SERVER_URL, true);
- env_type env(server);
-
- std::cout<<"Environame URL: "< options;
- options.insert({"is_slippery", true});
- env.make("v1", options);
- env.reset(42, std::unordered_map());
-
- std::cout<<"Is environment created? "<
#include
#include
+#include
namespace example_5{
-
+
+ using namespace bitrl;
+
using namespace bitrl::envs::grid_world;
void create_static(){
std::cout<<"Creating STATIC Gridworld..."< env;
+ Gridworld<4> env;
std::unordered_map options;
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
options["mode"] = std::any(GridWorldInitType::STATIC);
- env.make("v0", options);
+ env.make("v0", options, reset_ops);
std::cout<<"Number of actions: "< options;
options["mode"] = std::any(GridWorldInitType::RANDOM);
- env.make("v0", options);
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+
+ env.make("v0", options, reset_ops);
std::cout<<"Number of actions: "< options;
- env.make("v1", options);
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+ env.make("v1", options, reset_ops);
auto time_step = env.reset();
diff --git a/examples/example_7/example_7.cpp b/examples/example_7/example_7.cpp
index 2b4840bc..725dacae 100644
--- a/examples/example_7/example_7.cpp
+++ b/examples/example_7/example_7.cpp
@@ -25,7 +25,7 @@ int main(){
std::unordered_map options;
// make the environment
- env.make("v1", options);
+ env.make("v1", options, options);
std::cout<<"Is active? "<
@@ -15,11 +15,11 @@ int main(){
using namespace bitrl::envs::gymnasium;
using bitrl::uint_t;
- using bitrl::envs::RESTApiServerWrapper;
+ using bitrl::network::RESTRLEnvClient;
const std::string SERVER_URL = "http://0.0.0.0:8001/api";
- RESTApiServerWrapper server(SERVER_URL, true);
+ RESTRLEnvClient server(SERVER_URL, true);
// Acrobot vector environment
AcrobotV env(server);
@@ -29,9 +29,12 @@ int main(){
std::unordered_map options;
options["num_envs"] = std::any(static_cast(3));
-
+
+ std::unordered_map reset_ops;
+ reset_ops.insert({"seed", static_cast(42)});
+
// make the environment
- env.make("v1", options);
+ env.make("v1", options, reset_ops);
std::cout<<"Reseting the environment... "<
@@ -10,179 +11,169 @@
namespace bitrl{
-namespace envs{
-namespace connect2{
-
-const std::string Connect2::name = "Connect2";
-
-Connect2::Connect2()
-:
-EnvBase>,
- DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(0, "Connect2"),
-discount_(1.0),
-board_()
-{}
-
-Connect2::Connect2(uint_t cidx)
-:
-EnvBase>,
- DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(cidx, "Connect2"),
-discount_(1.0),
-board_()
-{}
-
-Connect2::Connect2(const Connect2& other)
-:
-EnvBase>,
- DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(other),
-discount_(1.0),
-board_(other.board_),
-is_finished_(other.is_finished_)
-{}
-
-void
-Connect2::make(const std::string& /*version*/,
- const std::unordered_map& /*options*/){
-
- board_.resize(4, 0);
- this -> set_version_("v1");
- this -> make_created_();
+namespace envs::connect2
+{
-}
+ const std::string Connect2::name = "Connect2";
-Connect2::time_step_type
-Connect2::step(const action_type& action){
- return move(player_id_1_, action);
-
-}
+ Connect2::Connect2()
+ :
+ EnvBase>,
+ DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >( "Connect2"),
+ discount_(1.0),
+ board_()
+ {}
-Connect2::time_step_type
-Connect2::reset(uint_t /*seed*/,
- const std::unordered_map& /*options*/){
- board_ = std::vector(4, 0);
- is_finished_ = false;
- this -> get_current_time_step_() = Connect2::time_step_type(TimeStepTp::FIRST, 0.0, board_, discount_);
- return this -> get_current_time_step_();
-}
-bool
-Connect2::is_win(uint_t player)const noexcept{
-
- auto player_sum = 0;
- std::for_each(board_.begin(),
- board_.end(),
- [&player_sum, player](auto val){
-
- if(val == player)
- player_sum += 1;
- });
-
- return player_sum == win_val_;
-}
-std::vector
-Connect2::get_valid_moves()const{
-
- std::vector val_moves_;
- val_moves_.reserve(4);
-
- for(uint_t i=0; i>,
+ DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(other),
+ discount_(1.0),
+ board_(other.board_),
+ is_finished_(other.is_finished_)
+ {}
+
+ void
+ Connect2::make(const std::string& /*version*/,
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options){
+
+ board_.resize(4, 0);
+ this -> set_version_("v1");
+ this -> make_created_();
+
+ auto idx = utils::uuid4();
+ this -> set_idx_(idx);
+ this -> base_type::make("v1", options, reset_options);
+ this -> make_created_();
-bool
-Connect2::has_legal_moves()const noexcept{
-
- for(auto idx : board_){
- if(idx == 0){
- return true;
- }
}
-
- return false;
-}
+ Connect2::time_step_type
+ Connect2::step(const action_type& action){
+ return move(player_id_1_, action);
-Connect2::time_step_type
-Connect2::move(const uint_t pid, const action_type& action){
-
-
- if(pid != 1 && pid != 2){
- throw std::logic_error("Invalid player id: " + std::to_string(pid));
}
-
- if(action >= board_.size()){
- throw std::logic_error("Invalid action id: " + std::to_string(action));
+
+ Connect2::time_step_type
+ Connect2::reset(){
+ board_ = std::vector(4, 0);
+ is_finished_ = false;
+ this -> get_current_time_step_() = Connect2::time_step_type(TimeStepTp::FIRST, 0.0, board_, discount_);
+ return this -> get_current_time_step_();
}
-
- if(is_finished_){
- return reset();
+
+ bool
+ Connect2::is_win(uint_t player)const noexcept{
+
+ auto player_sum = 0;
+ std::for_each(board_.begin(),
+ board_.end(),
+ [&player_sum, player](auto val){
+
+ if(val == player)
+ player_sum += 1;
+ });
+
+ return player_sum == win_val_;
}
-
- auto valid_move = true;
- if(board_[action] != 0){
- valid_move = false;
+
+ std::vector
+ Connect2::get_valid_moves()const{
+
+ std::vector val_moves_;
+ val_moves_.reserve(4);
+
+ for(uint_t i=0; i= board_.size()){
+ throw std::logic_error("Invalid action id: " + std::to_string(action));
}
-
- auto val_moves = get_valid_moves();
-
- std::unordered_map extra;
- extra["valid_moves"] = std::any(val_moves);
- return Connect2::time_step_type(step_type, reward,
- board_, discount_,
- std::move(extra));
+
+ if(is_finished_){
+ return reset();
+ }
+
+ auto valid_move = true;
+ if(board_[action] != 0){
+ valid_move = false;
+ }
+
+ if(valid_move){
+ // this position on the board
+ // is occupied by the given player
+ board_[action] = pid;
+
+ bool won = is_win(pid);
+ bool has_moves = has_legal_moves();
+
+ // there may be more moves to make in the game
+ // but the player may have won. That's why we look
+ // at the won variable first
+ auto step_type = TimeStepTp::INVALID_TYPE;
+ auto reward = 0.0;
+ if(won){
+ step_type = TimeStepTp::LAST;
+ is_finished_ = true;
+ reward = 1.0;
+ }
+ else if(has_moves){
+ // the player has not won the game
+ // and there may be more moves
+ step_type = TimeStepTp::MID;
+ reward = 0.0;
+
+ }
+ else{
+ // the player lost the game
+ step_type = TimeStepTp::LAST;
+ is_finished_ = true;
+ reward = -1.0;
+ }
+
+ auto val_moves = get_valid_moves();
+
+ std::unordered_map extra;
+ extra["valid_moves"] = std::any(val_moves);
+ return Connect2::time_step_type(step_type, reward,
+ board_, discount_,
+ std::move(extra));
+ }
+
+ throw std::logic_error("Move: " + std::to_string(action) + " is invalid");
+
}
-
- throw std::logic_error("Move: " + std::to_string(action) + " is invalid");
-
-}
-Connect2
-Connect2::make_copy(uint_t cidx)const{
- Connect2 copy(cidx);
- std::unordered_map ops;
- auto ver = this -> version();
- copy.make(ver, ops);
- return copy;
-}
-
-}
+
}
}
\ No newline at end of file
diff --git a/src/bitrl/envs/connect2/connect2_env.h b/src/bitrl/envs/connect2/connect2_env.h
index 11cd5a5c..03193faf 100644
--- a/src/bitrl/envs/connect2/connect2_env.h
+++ b/src/bitrl/envs/connect2/connect2_env.h
@@ -81,12 +81,7 @@ class Connect2 final: public EnvBase>,
/// \brief Constructor
///
Connect2();
-
- ///
- /// \brief Constructor
- ///
- explicit Connect2(uint_t cidx);
-
+
///
///
///
@@ -97,7 +92,8 @@ class Connect2 final: public EnvBase>,
/// environment will be slippery
///
virtual void make(const std::string& version,
- const std::unordered_map& options) override final;
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options) override final;
///
/// \brief step. Move in the environment with the given action
@@ -115,8 +111,7 @@ class Connect2 final: public EnvBase>,
///
/// \brief Reset the environment
///
- virtual time_step_type reset(uint_t /*seed*/,
- const std::unordered_map& /*options*/)override final;
+ virtual time_step_type reset()override final;
///
/// \brief Create a new copy of the environment with the given
@@ -155,9 +150,7 @@ class Connect2 final: public EnvBase>,
std::vector get_valid_moves()const;
private:
-
-
-
+
///
/// \brief The discount factor
///
diff --git a/src/bitrl/envs/env_base.h b/src/bitrl/envs/env_base.h
index 7e57fe57..f26cd8a7 100644
--- a/src/bitrl/envs/env_base.h
+++ b/src/bitrl/envs/env_base.h
@@ -17,208 +17,189 @@
namespace bitrl{
namespace envs{
-
-///
-/// \class EnvBase
-/// \brief Base class for environments.
-///
-/// The EnvBase class establishes the minimum contract
-/// that an RL environment should expose.
-///
+/**
+ * @brief Base class interface for Reinforcement Learning environments.
+ *
+ * This class defines the minimum API contract that any RL environment
+ * must implement. It exposes functionality for creating, resetting,
+ * and stepping through an environment, while tracking configuration
+ * such as version, options, and current state.
+ *
+ * @tparam TimeStepType Type returned after each environment step
+ * @tparam SpaceType Environment's space interface type providing
+ * state and action space definitions
+ */
template
class EnvBase: public SpaceType, public synchronized_env_mixin
{
public:
- static_assert(std::is_default_constructible::value && "TimeStepType should be default constructible");
- static_assert(std::is_default_constructible::value && "SpaceType should be default constructible");
+ static_assert(std::is_default_constructible::value &&
+ "TimeStepType should be default constructible");
+ static_assert(std::is_default_constructible::value &&
+ "SpaceType should be default constructible");
- ///
- /// \brief Default seed to use
- ///
+ /** @brief Default seed used in reset() if none provided */
static const uint_t DEFAULT_ENV_SEED = 42;
- ///
- /// \brief The time step type we return every time a step in the
- /// environment is performed
- ///
+ /** @brief Alias for the type returned when stepping the environment */
typedef TimeStepType time_step_type;
- ///
- /// \brief The type describing the state space for the environment
- ///
+ /** @brief Type describing the environment state space */
typedef typename SpaceType::state_space state_space_type;
- ///
- /// \brief The type of the state
- ///
+ /** @brief Type describing an individual state */
typedef typename SpaceType::state_type state_type;
- ///
- /// \brief The type of the action space for the environment
- ///
+ /** @brief Type describing the environment action space */
typedef typename SpaceType::action_space action_space_type;
- ///
- /// \brief The type of the action to be undertaken in the environment
- ///
+ /** @brief Type representing an individual action */
typedef typename SpaceType::action_type action_type;
- ///
- /// \brief Destructor
- ///
+ /** @brief Virtual destructor */
virtual ~EnvBase()=default;
- ///
- /// \brief make. Builds the environment.
- /// \param version. the version of the environment to build
- /// \param options. Options to use for building the environment.
- /// Concrete classes may choose to hold a copy
- ///
+ /**
+ * @brief Construct the environment instance.
+ *
+ * @param version Version string used to control environment variant
+ * @param make_options Key-value configuration options for environment creation.
+ * @param reset_options Key-value configuration how the environment should be reset
+ *
+ * @note Derived classes should use set_version_() and set_make_options_()
+ * internally. They may store selected options for later use.
+ */
virtual void make(const std::string& version,
- const std::unordered_map& options) = 0;
+ const std::unordered_map& make_options,
+ const std::unordered_map& reset_options) = 0;
- ///
- /// \brief close the environment
- ///
+ /** @brief Close and release any acquired environment resources */
virtual void close()=0;
- ///
- /// \brief Reset the environment
- /// \param seed. The seed to use for resetting the environment
- /// \param options. Options to use for resetting the environment.
- ///
- virtual time_step_type reset(uint_t seed,
- const std::unordered_map& options)=0;
-
- ///
- /// \brief Reset the environment always using the same seed
- ///
- time_step_type reset(){
- return reset(DEFAULT_ENV_SEED, std::unordered_map());}
-
- ///
- /// \brief Reset the environment always using the provided seed
- ///
- time_step_type reset(uint_t seed){
- return reset(seed, std::unordered_map());}
-
- ///
- /// \brief step in the environment by performing the given action
- /// \param action. The action to execute in the environment
- /// \return An instance of time_step_type
+ /**
+ * @brief Reset the environment to an initial state using the reset
+ * options specified during make.
+ *
+ * @return Initial time step after reset
+ */
+ virtual time_step_type reset()=0;
+
+ /**
+ * @brief Perform one step in the environment using an action.
+ *
+ * @param action Action applied to the environment
+ * @return New time step after executing the action
+ */
virtual time_step_type step(const action_type& action)=0;
-
- ///
- /// \brief is_created Returns true is make has been called successfully
- ///
- bool is_created()const noexcept{return is_created_;}
-
- ///
- /// \brief Returns the version of the environment
- ///
- std::string version()const noexcept{return version_;}
-
- ///
- /// \brief Returns the name of the environment
- ///
- std::string env_name()const noexcept{return name_;}
-
- ///
- /// \brief Returns a read reference to the options passed when calling make
- ///
+
+ /**
+ * @brief Access the configuration options provided to make().
+ * @return Map of option keys and values
+ */
const std::unordered_map& make_options()const noexcept{return make_options_;}
-
- ///
- /// \brief Read the option with the given name
- ///
+
+ /**
+ * @brief Access the configuration options provided to make().
+ * @return Map of option keys and values
+ */
+ const std::unordered_map& reset_options()const noexcept{return reset_options_;}
+
+ /**
+ * @brief Read a specific make() option and cast it to the requested type.
+ *
+ * @tparam T Expected data type
+ * @param op_name Key of the option to read
+ * @return Requested value if present
+ * @throws std::bad_any_cast If stored type does not match T
+ */
template
T read_option(const std::string& op_name)const;
- ///
- /// \brief Returns the index of the environment that is active within
- /// a simulation
- ///
- uint_t cidx()const noexcept{return cidx_;}
+ /**
+ * @brief Get the id identifying this environment within a simulation batch.
+ * The id is valid only if make has been called
+ * @return Copy index
+ */
+ std::string idx()const noexcept{return idx_;}
+
+ /**
+ * @brief Check if make() has successfully initialized the environment.
+ * @return True if environment is ready, false otherwise
+ */
+ bool is_created()const noexcept{return is_created_;}
+
+ /**
+ * @brief Get the name of this environment instance.
+ * @return Environment name
+ */
+ std::string env_name()const noexcept{return name_;}
+
+ /**
+ * @brief Get the environment version set during make().
+ * @return Version string
+ */
+ std::string version()const noexcept{return version_;}
protected:
- ///
- /// \brief Constructor
- ///
- explicit EnvBase(const uint_t cidx=0,
+ /**
+ * @brief Constructor (protected — for subclassing only).
+ * @param cidx Copy index used in multi-environment simulations
+ * @param name Name of the environment instance
+ */
+ explicit EnvBase(const std::string& idx=bitrl::consts::INVALID_STR,
const std::string& name=bitrl::consts::INVALID_STR);
- ///
- /// \brief Copy constructor
- ///
+ /** @brief Copy constructor */
EnvBase(const EnvBase&);
- ///
- /// \brief Helper function to set the version.
- /// To be called only when the make is called
- ///
+ /**
+ * @brief Set internal version string.
+ * @note Should be called only inside make()
+ */
void set_version_(const std::string& version )noexcept{version_ = version;}
+
+ /**
+ * @brief Set the id of the environment
+ * @param idx
+ */
+ void set_idx_(const std::string& idx)noexcept{idx_ = idx;}
- ///
- /// \brief Set the make options
- ///
+ /** @brief Store make() options for future access */
void set_make_options_(const std::unordered_map& options) noexcept{make_options_ = options;}
- ///
- /// \brief
- ///
+ /** @brief Mark environment as not created */
void invalidate_is_created_flag_()noexcept{is_created_ = false;}
- ///
- /// \brief mark the environment as created
- ///
+ /** @brief Mark environment creation as successful */
void make_created_()noexcept{is_created_= true;}
-
+
+ /** @brief Mutable access to the current time step */
time_step_type& get_current_time_step_()noexcept{return current_state_;}
+
+ /** @brief Read-only access to the current time step */
const time_step_type& get_current_time_step_()const noexcept{return current_state_;}
private:
- ///
- /// \brief Flag indicating if the environment has been created
- ///
- bool is_created_;
-
- ///
- /// The copy idx
- ///
- uint_t cidx_;
-
- ///
- /// \brief Version of the environment
- ///
- std::string version_;
-
- ///
- /// \brief Name of the environment
- ///
- const std::string name_;
-
- ///
- /// \brief Copy of the options upon calling make
- ///
- std::unordered_map make_options_;
-
- ///
- /// \brief current_state
- ///
- time_step_type current_state_;
+ bool is_created_; ///< Indicates that make() has finished successfully
+ std::string idx_; ///< Environment instance id
+ std::string version_; ///< Environment version identifier
+ const std::string name_; ///< Environment name
+ std::unordered_map make_options_; ///< Copied options from make()
+ std::unordered_map reset_options_; ///< Copied options from make()
+ time_step_type current_state_; ///< Latest environment time step
};
template
-EnvBase::EnvBase(const uint_t cidx, const std::string& name)
+EnvBase::EnvBase(const std::string& idx, const std::string& name)
:
SpaceType(),
synchronized_env_mixin(),
is_created_(false),
-cidx_(cidx),
+idx_(idx),
version_(),
name_(name),
current_state_()
@@ -230,7 +211,7 @@ EnvBase::EnvBase(const EnvBase
SpaceType(),
synchronized_env_mixin(),
is_created_(other.is_created_),
-cidx_(other.cidx_),
+idx_(other.idx_),
version_(other.version_),
name_(other.name_),
current_state_()
@@ -255,6 +236,16 @@ EnvBase::read_option(const std::string& op_name)const{
throw std::logic_error("Option: " + op_name + " not found");
}
+template
+void EnvBase::make(const std::string& version,
+ const std::unordered_map& make_options,
+ const std::unordered_map& reset_options)
+{
+ version_ = version;
+ make_options_ = make_options;
+ reset_options_ = reset_options;
+}
+
}
}
diff --git a/src/bitrl/envs/gdrl/gym_walk.h b/src/bitrl/envs/gdrl/gym_walk.h
index 17264104..6fd8e703 100644
--- a/src/bitrl/envs/gdrl/gym_walk.h
+++ b/src/bitrl/envs/gdrl/gym_walk.h
@@ -24,6 +24,8 @@
#include
#include
+#include "../../sensors/ekf_sensor_fusion.h"
+
#ifdef BITRL_DEBUG
#include
#endif
@@ -32,16 +34,15 @@ namespace bitrl{
namespace envs::gdrl
{
-
- ///
-/// \brief class GymWalk. Interface for the GymWalk environment
-///
- template
- class GymWalk final: public EnvBase,
+/**
+ * GymWalk. Interface for the GymWalk environment
+ */
+template
+class GymWalk final: public EnvBase,
ScalarDiscreteEnv
>
- {
- public:
+{
+public:
///
/// \brief name
@@ -91,13 +92,7 @@ namespace envs::gdrl
///
/// \brief Constructor
///
- GymWalk(const RESTApiServerWrapper& api_server);
-
- ///
- /// \brief Constructor
- ///
- GymWalk(const RESTApiServerWrapper& api_server,
- const uint_t cidx);
+ GymWalk(const network::RESTApiServerWrapper& api_server);
///
/// \brief copy ctor
@@ -109,7 +104,8 @@ namespace envs::gdrl
/// environment will be slippery
///
virtual void make(const std::string& version,
- const std::unordered_map& options) override final;
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options) override final;
///
@@ -132,9 +128,7 @@ namespace envs::gdrl
///
/// \brief Reset the environment
///
- virtual time_step_type reset(uint_t seed,
- const std::unordered_map& options)override final;
-
+ virtual time_step_type reset()override final;
///
/// \brief Create a new copy of the environment with the given
@@ -154,7 +148,7 @@ namespace envs::gdrl
uint_t n_actions()const noexcept{return action_space_type::size;}
- private:
+private:
///
/// \brief build the dynamics from response
@@ -167,7 +161,7 @@ namespace envs::gdrl
time_step_type create_time_step_from_response_(const nlohmann::json& response) const;
- RESTApiServerWrapper api_server_;
+ network::RESTApiServerWrapper* api_server_;
};
template
@@ -176,29 +170,16 @@ namespace envs::gdrl
template
- GymWalk::GymWalk(const RESTApiServerWrapper& api_server)
+ GymWalk::GymWalk(network::RESTApiServerWrapper& api_server)
:
EnvBase,
ScalarDiscreteEnv
- >(0, GymWalk::name),
- api_server_(api_server_)
+ >(GymWalk::name),
+ api_server_(&api_server_)
{
api_server_.register_if_not(GymWalk::name, GymWalk::URI);
}
- template
- GymWalk::GymWalk(const RESTApiServerWrapper& api_server,
- const uint_t cidx)
- :
- EnvBase,
- ScalarDiscreteEnv
- >(cidx, "GymWalk"),
- api_server_(api_server)
- {
- api_server_.register_if_not(GymWalk::name, GymWalk::URI);
- }
-
-
template
GymWalk::GymWalk(const GymWalk& other)
:
@@ -232,19 +213,21 @@ namespace envs::gdrl
template
void
GymWalk::make(const std::string& version,
- const std::unordered_map& options){
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options){
if(this->is_created()){
return;
}
auto response = api_server_.make(this->env_name(),
- this->cidx(),
version,
ops);
- this->set_version_(version);
- this->make_created_();
+ auto idx = response["idx"];
+ this -> set_idx_(idx);
+ this -> base_type::make(version, options, reset_options);
+ this -> make_created_();
}
template
@@ -260,7 +243,7 @@ namespace envs::gdrl
}
const auto response = api_server_.step(this -> env_name(),
- this -> cidx(),
+ this -> idx(),
action);
this->get_current_time_step_() = this->create_time_step_from_response_(response);
@@ -289,8 +272,7 @@ namespace envs::gdrl
template
typename GymWalk::time_step_type
- GymWalk::reset(uint_t seed,
- const std::unordered_map& /*options*/){
+ GymWalk::reset(){
if(!this->is_created()){
#ifdef RLENVSCPP_DEBUG
@@ -300,24 +282,13 @@ namespace envs::gdrl
}
auto response = this -> api_server_.reset(this->env_name(),
- this -> cidx(), seed,
+ this -> idx(), seed,
nlohmann::json());
this->create_time_step_from_response_(response);
return this -> get_current_time_step_();
}
- template
- GymWalk
- GymWalk::make_copy(uint_t cidx)const{
-
- GymWalk copy(api_server_ ,cidx);
- std::unordered_map ops;
- auto version = this -> version();
- copy.make(version, ops);
- return copy;
- }
-
}
}
diff --git a/src/bitrl/envs/grid_world/grid_world_env.h b/src/bitrl/envs/grid_world/grid_world_env.h
index 023ca463..f5f2c8db 100644
--- a/src/bitrl/envs/grid_world/grid_world_env.h
+++ b/src/bitrl/envs/grid_world/grid_world_env.h
@@ -14,6 +14,7 @@
#include "bitrl/envs/time_step.h"
#include "bitrl/envs/env_base.h"
#include "bitrl/envs/space_type.h"
+#include "bitrl/utils/utils.h"
#ifdef BITRL_DEBUG
#include
@@ -191,52 +192,36 @@ namespace envs::grid_world
[[nodiscard]] board_move_type validate_move(board_component_type piece, board_position pos)const;
};
- template
- struct GridWorldEnv
- {
-
- typedef detail::board state_space;
-
- typedef detail::board_state_type state_type;
-
- ///
- /// \brief state space size
- ///
- static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size;
-
- ///
- /// \brief the action space type
- ///
- typedef ScalarDiscreteSpace<0, 4> action_space;
-
- ///
- /// \brief the Action type
- ///
- typedef action_space::space_item_type action_type;
+template
+struct GridWorldEnv
+{
- ///
- /// \brief action space size
- ///
- static constexpr uint_t ACTION_SPACE_SIZE = action_space::size;
- };
- }
+ typedef detail::board state_space;
+ typedef detail::board_state_type state_type;
+ static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size;
+ typedef ScalarDiscreteSpace<0, 4> action_space;
+ typedef action_space::space_item_type action_type;
+ static constexpr uint_t ACTION_SPACE_SIZE = action_space::size;
+};
+}
- ///
- /// The Gridworld class models a square board. There are three ways to initialize the board.
- /// - static
- /// - random
- /// - player
- /// See the GridworldInitType enumeration.
- /// Static initialization means that the objects on the board are initialized at the same predetermined locations.
- /// Player initialization means that the player is initialized at a random position on the board.
- /// Random initialization means that all the objects are placed randomly
- ///
- template
- class Gridworld final: public EnvBase,
+/**
+ * The Gridworld class models a square board. There are three ways to initialize the board.
+ * - static
+ * - random
+ * - player
+ * See the GridworldInitType enumeration.
+ * Static initialization means that the objects on the board are initialized at the same predetermined locations.
+ * Player initialization means that the player is initialized at a random position on the board.
+ * Random initialization means that all the objects are placed randomly
+ */
+
+template
+class Gridworld final: public EnvBase,
detail::GridWorldEnv>
- {
- public:
+{
+public:
static_assert (side_size_ >= 4,
"The side size should be greater than or equal to 4");
@@ -300,11 +285,6 @@ namespace envs::grid_world
///
/// \brief Gridworld. Constructor
- ///
- explicit Gridworld(const uint_t cidx);
-
- ///
- /// \brief Gridworld. Constructor
///
Gridworld(const Gridworld& other);
@@ -313,19 +293,13 @@ namespace envs::grid_world
/// environment will be slippery
///
void make(const std::string& version,
- const std::unordered_map& options) override final;
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options) override final;
///
/// \brief Reset the environment
///
- time_step_type reset(uint_t /*seed*/,
- const std::unordered_map& /*options*/) override final;
-
- ///
- /// \brief Create a new copy of the environment with the given
- /// copy index
- ///
- Gridworld make_copy(uint_t cidx)const;
+ time_step_type reset() override final;
///
/// \brief step
@@ -379,7 +353,7 @@ namespace envs::grid_world
///
[[nodiscard]] GridWorldInitType init_type()const noexcept{return init_mode_;}
- private:
+private:
///
/// \brief init_mode_
@@ -420,20 +394,7 @@ namespace envs::grid_world
Gridworld::Gridworld()
:
EnvBase,
- detail::GridWorldEnv>(0, "Gridworld"),
- init_mode_(GridWorldInitType::INVALID_TYPE),
- randomize_state_(false),
- seed_(0),
- noise_factor_(0.0),
- board_()
- {
- }
-
- template
- Gridworld::Gridworld(uint_t cidx)
- :
- EnvBase,
- detail::GridWorldEnv>(cidx, "Gridworld"),
+ detail::GridWorldEnv>(Gridworld::name),
init_mode_(GridWorldInitType::INVALID_TYPE),
randomize_state_(false),
seed_(0),
@@ -442,7 +403,6 @@ namespace envs::grid_world
{
}
-
template
Gridworld::Gridworld(const Gridworld& other)
:
@@ -459,7 +419,8 @@ namespace envs::grid_world
template
void
Gridworld::make(const std::string& version,
- const std::unordered_map& options){
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options){
if(this -> is_created()){
return;
@@ -491,22 +452,14 @@ namespace envs::grid_world
// to created
this->set_version_(version);
this->make_created_();
- }
- template
- Gridworld
- Gridworld::make_copy(uint_t cidx)const{
-
- Gridworld copy(cidx);
- std::unordered_map ops;
- ops["randomize_state"] = this -> has_random_state();
- ops["noise_factor"] = this -> noise_factor_;
- ops["seed"] = std::any(static_cast(this -> seed_));
- auto version = this -> version();
- copy.make(version, ops);
- return copy;
+ auto idx = utils::uuid4();
+ this -> set_idx_(idx);
+ this -> base_type::make(version, options, reset_options);
+ this -> make_created_();
}
+
template
typename Gridworld::time_step_type
Gridworld::step(const action_type& action){
@@ -526,8 +479,7 @@ namespace envs::grid_world
template
typename Gridworld::time_step_type
- Gridworld::reset(uint_t /*seed*/,
- const std::unordered_map& /*options*/){
+ Gridworld::reset(){
// reinitialize the board
auto obs = board_.init_board(side_size_, init_mode_);
diff --git a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp
index d6234b13..97cf8a00 100644
--- a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp
+++ b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp
@@ -16,14 +16,9 @@ namespace bitrl
const std::string LunarLanderContinuousEnv::name = "LunarLanderContinuous";
const std::string LunarLanderContinuousEnv::URI = "/gymnasium/lunar-lander-continuous-env";
- LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server)
+ LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(network::RESTRLEnvClient& api_server)
:
- LunarLanderDiscreteEnv::base_type(api_server, 0, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI)
- {}
-
- LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server, const uint_t cidx)
- :
- LunarLanderDiscreteEnv::base_type(api_server, cidx, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI)
+ LunarLanderDiscreteEnv::base_type(api_server, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI)
{}
LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const LunarLanderDiscreteEnv& other)
@@ -31,42 +26,18 @@ namespace bitrl
LunarLanderDiscreteEnv::base_type(other)
{}
- LunarLanderDiscreteEnv
- LunarLanderDiscreteEnv::make_copy(uint_t cidx)const
- {
- LunarLanderDiscreteEnv copy(this -> get_api_server(), cidx);
- auto version = this -> version();
- copy.make(version, this -> make_options());
- return copy;
- }
-
- LunarLanderContinuousEnv::LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server)
+ LunarLanderContinuousEnv::LunarLanderContinuousEnv(network::RESTRLEnvClient& api_server)
:
- LunarLanderContinuousEnv::base_type(api_server, 0, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI)
+ LunarLanderContinuousEnv::base_type(api_server, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI)
{}
- LunarLanderContinuousEnv::LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server, const uint_t cidx)
- :
- LunarLanderContinuousEnv::base_type(api_server, cidx, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI)
- {}
LunarLanderContinuousEnv::LunarLanderContinuousEnv(const LunarLanderContinuousEnv& other)
:
LunarLanderContinuousEnv::base_type(other)
{}
- LunarLanderContinuousEnv
- LunarLanderContinuousEnv::make_copy(uint_t cidx)const
- {
-
- LunarLanderContinuousEnv copy(this -> get_api_server(), cidx);
- auto version = this -> version();
- copy.make(version, this -> make_options());
- return copy;
-
- }
-
}
}
diff --git a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h
index 7ff0d463..bb82e165 100644
--- a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h
+++ b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h
@@ -8,7 +8,7 @@
#include "bitrl/bitrl_types.h"
#include "bitrl/envs/time_step.h"
#include "bitrl/envs/gymnasium/gymnasium_env_base.h"
-#include "bitrl/envs/api_server/apiserver.h"
+#include "bitrl/network/rest_rl_env_client.h"
#include "bitrl/envs/env_types.h"
#include "bitrl/extern/nlohmann/json/json.hpp"
@@ -59,8 +59,7 @@ namespace bitrl
/// @param name
/// @param uri
///
- _LunarLanderEnv(const RESTApiServerWrapper& api_server, uint_t cidx,
- const std::string& name, const std::string& uri);
+ _LunarLanderEnv(network::RESTRLEnvClient& api_server, const std::string& name, const std::string& uri);
///
/// @param other
@@ -76,7 +75,8 @@ namespace bitrl
/// \brief make. Build the environment
///
virtual void make(const std::string& version,
- const std::unordered_map& /*options*/) override final;
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options) override final;
///
/// \brief step. Step in the environment following the given action
@@ -98,10 +98,10 @@ namespace bitrl
};
template
- _LunarLanderEnv::_LunarLanderEnv(const RESTApiServerWrapper& api_server, uint_t cidx,
+ _LunarLanderEnv::_LunarLanderEnv(network::RESTRLEnvClient& api_server,
const std::string& name, const std::string& uri)
:
- GymnasiumEnvBase(api_server, cidx, name)
+ GymnasiumEnvBase(api_server, name)
{
this -> get_api_server().register_if_not(name, uri);
}
@@ -115,7 +115,8 @@ namespace bitrl
template
void
_LunarLanderEnv::make(const std::string& version,
- const std::unordered_map& options)
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options)
{
if(this->is_created()){
return;
@@ -148,11 +149,11 @@ namespace bitrl
}
auto response = this -> get_api_server().make(this -> env_name(),
- this -> cidx(),
version, ops);
- this -> set_version_(version);
- this -> set_make_options_(options);
+ auto idx = response["idx"];
+ this -> set_idx_(idx);
+ this -> base_type::make(version, options, reset_options);
this -> make_created_();
}
@@ -166,11 +167,11 @@ namespace bitrl
#endif
if(this->get_current_time_step_().last()){
- return this->reset(42, std::unordered_map());
+ return this->reset();
}
const auto response = this -> get_api_server().step(this -> env_name(),
- this -> cidx(),
+ this -> idx(),
action);
this->get_current_time_step_() = this->create_time_step_from_response_(response);
@@ -199,10 +200,10 @@ namespace bitrl
///
/// \brief LunarLanderDiscreteEnv environment with discrete action space
///
- class LunarLanderDiscreteEnv final : public lunar_lander_detail::_LunarLanderEnv>,
+class LunarLanderDiscreteEnv final : public lunar_lander_detail::_LunarLanderEnv>,
ContinuousVectorStateDiscreteActionEnv<8, 4, 0, real_t>>
- {
- public:
+{
+public:
///
/// \brief name
///
@@ -248,12 +249,7 @@ namespace bitrl
/// Constructor
/// @param api_server
///
- LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server);
-
- ///
- /// \brief Constructor
- ///
- LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server, const uint_t cidx);
+ LunarLanderDiscreteEnv(network::RESTRLEnvClient& api_server);
///
/// @param other
@@ -265,21 +261,16 @@ namespace bitrl
///
~LunarLanderDiscreteEnv() override =default;
- ///
- /// \brief Create a new copy of the environment with the given
- /// copy index
- ///
- LunarLanderDiscreteEnv make_copy(uint_t cidx)const;
-
};
- ///
- /// \brief LunarLanderDiscreteEnv environment with discrete action space
- ///
- class LunarLanderContinuousEnv final : public lunar_lander_detail::_LunarLanderEnv>,
+/**
+ *
+ * LunarLanderDiscreteEnv environment with discrete action space
+ */
+class LunarLanderContinuousEnv final : public lunar_lander_detail::_LunarLanderEnv>,
ContinuousVectorStateContinuousVectorActionEnv<8, 2, real_t, real_t>>
- {
- public:
+{
+public:
///
/// \brief name
///
@@ -325,12 +316,7 @@ namespace bitrl
/// Constructor
/// @param api_server
///
- LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server);
-
- ///
- /// \brief Constructor
- ///
- LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server, const uint_t cidx);
+ LunarLanderContinuousEnv(network::RESTRLEnvClient& api_server);
///
/// @param other
@@ -342,12 +328,6 @@ namespace bitrl
///
~LunarLanderContinuousEnv() override =default;
- ///
- /// \brief Create a new copy of the environment with the given
- /// copy index
- ///
- LunarLanderContinuousEnv make_copy(uint_t cidx)const;
-
};
}
}
diff --git a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp
index ec5e386c..a49d1d15 100644
--- a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp
+++ b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp
@@ -26,28 +26,16 @@ namespace envs::gymnasium
}
- Acrobot::Acrobot(const RESTApiServerWrapper& api_server)
+ Acrobot::Acrobot(network::RESTRLEnvClient& api_server)
:
GymnasiumEnvBase >,
ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t >
- >(api_server,
- 0,
- Acrobot::name)
+ >(api_server, Acrobot::name)
{
this -> get_api_server().register_if_not(Acrobot::name, Acrobot::URI);
}
- Acrobot::Acrobot(const RESTApiServerWrapper& api_server,
- const uint_t cidx)
- :
- GymnasiumEnvBase >,
- ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t >
- >(api_server,
- cidx,
- Acrobot::name)
- {
- this -> get_api_server().register_if_not(Acrobot::name, Acrobot::URI);
- }
+
Acrobot::Acrobot(const Acrobot& other)
:
@@ -58,18 +46,20 @@ namespace envs::gymnasium
void
Acrobot::make(const std::string& version,
- const std::unordered_map& /*options*/){
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options){
if(this->is_created()){
return;
}
auto response = this -> get_api_server().make(this -> env_name(),
- this -> cidx(),
version, nlohmann::json());
- this->set_version_(version);
- this->make_created_();
+ auto idx = response["idx"];
+ this -> set_idx_(idx);
+ this -> base_type::make(version, options, reset_options);
+ this -> make_created_();
}
@@ -81,29 +71,15 @@ namespace envs::gymnasium
#endif
if(this->get_current_time_step_().last()){
- return this->reset(42, std::unordered_map());
+ return this->reset();
}
const auto response = this -> get_api_server().step(this -> env_name(),
- this -> cidx(),
+ this -> idx(),
action);
this->get_current_time_step_() = this->create_time_step_from_response_(response);
return this->get_current_time_step_();
- }
-
- Acrobot
- Acrobot::make_copy(uint_t cidx)const{
-
- Acrobot copy(this->get_api_server(), cidx);
-
- std::unordered_map ops;
- auto version = this -> version();
- copy.make(version, ops);
- return copy;
- }
-
-
-
+}
}
}
diff --git a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h
index e299335f..52c70bf9 100644
--- a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h
+++ b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h
@@ -122,7 +122,7 @@
#include "bitrl/bitrl_types.h"
#include "bitrl/envs/time_step.h"
#include "bitrl/envs/gymnasium/gymnasium_env_base.h"
-#include "bitrl/envs/api_server/apiserver.h"
+#include "bitrl/network/rest_rl_env_client.h"
#include "bitrl/envs/env_types.h"
#include "bitrl/extern/nlohmann/json/json.hpp"
@@ -132,126 +132,100 @@
#include
namespace bitrl{
-namespace envs{
-namespace gymnasium{
+namespace envs::gymnasium
+{
-///
+ ///
/// \brief class Acrobot. The Acrobot class. Interface for Gymnasium::Acrobot environment.
///
class Acrobot final: public GymnasiumEnvBase >,
- ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t >
- >
+ ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t >
+ >
{
public:
- ///
+ ///
/// \brief The name of the environment
///
- static const std::string name;
-
- ///
+ static const std::string name;
+
+ ///
/// \brief The URI for accessing the environment
///
- static const std::string URI;
-
- ///
+ static const std::string URI;
+
+ ///
/// \brief Base class type
///
- typedef GymnasiumEnvBase >,
- ContinuousVectorStateDiscreteActionEnv< 6, // size of state space
- 2, // end of action space
- 0, // start of action space
- real_t> // type of state
- >::base_type base_type;
- ///
+ typedef GymnasiumEnvBase >,
+ ContinuousVectorStateDiscreteActionEnv< 6, // size of state space
+ 2, // end of action space
+ 0, // start of action space
+ real_t> // type of state
+ >::base_type base_type;
+ ///
/// \brief The time step type we return every time a step in the
/// environment is performed
///
- typedef typename base_type::time_step_type time_step_type;
-
- ///
+ typedef typename base_type::time_step_type time_step_type;
+
+ ///
/// \brief The type describing the state space for the environment
///
- typedef typename base_type::state_space_type state_space_type;
-
- ///
+ typedef typename base_type::state_space_type state_space_type;
+
+ ///
/// \brief The type of the action space for the environment
///
- typedef typename base_type::action_space_type action_space_type;
+ typedef typename base_type::action_space_type action_space_type;
- ///
+ ///
/// \brief The type of the action to be undertaken in the environment
///
- typedef typename base_type::action_type action_type;
-
- ///
+ typedef typename base_type::action_type action_type;
+
+ ///
/// \brief The type of the state
///
- typedef typename base_type::state_type state_type;
-
- ///
- /// \brief Acrobot. Constructor
- /// \param api_server. The RESTApiServerWrapper instance to use
- ///
- Acrobot(const RESTApiServerWrapper& api_server );
-
- ///
- /// \brief Acrobot. Constructor
- /// \param api_server. The RESTApiServerWrapper instance to use
- /// \param cidx. The index to assign to the created environment
- ///
- Acrobot(const RESTApiServerWrapper& api_server,
- const uint_t cidx);
-
- ///
- /// \brief Acrobot. Copy constructor
- ///
- Acrobot(const Acrobot& other);
-
- ///
- /// \brief ~Acrobot. Destructor
- ///
- ~Acrobot()=default;
+ typedef typename base_type::state_type state_type;
- ///
+
+ Acrobot(network::RESTRLEnvClient& api_server );
+
+ Acrobot(const Acrobot& other);
+
+ ~Acrobot() override =default;
+
+ ///
/// \brief make. Build the environment
/// \param version. The version of the environment to create
/// \param options. The options to use when creating the environment
///
- virtual void make(const std::string& version,
- const std::unordered_map& options) override final;
-
- ///
+ virtual void make(const std::string& version,
+ const std::unordered_map& options,
+ const std::unordered_map& reset_options) override final;
+
+ ///
/// \brief step. Step in the environment following the given action.
/// \param action. The action to execute
///
- virtual time_step_type step(const action_type& action) override final;
-
- ///
- /// \brief Create a new copy of the environment with the given
- /// copy index.
- /// \param cidx. The index to assign to the copied environment
- ///
- Acrobot make_copy(uint_t cidx)const;
+ virtual time_step_type step(const action_type& action) override final;
- ///
+ ///
/// \brief n_actions. Returns the number of actions
///
- uint_t n_actions()const noexcept{return action_space_type::size;}
+ uint_t n_actions()const noexcept{return action_space_type::size;}
protected:
-
- ///
+
+ ///
/// \brief Handle the reset response from the environment server
///
- virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final;
-
-};
+ virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final;
+ };
-
-}
}
}
diff --git a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp
index 8378a51e..768e1372 100644
--- a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp
+++ b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp
@@ -29,21 +29,10 @@ namespace envs::gymnasium
std::unordered_map());
}
- CartPole::CartPole(const RESTApiServerWrapper& api_server)
+ CartPole::CartPole(network::RESTRLEnvClient& api_server)
:
GymnasiumEnvBase >,
- ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server, 0, CartPole::name)
- {
- this -> get_api_server().register_if_not(CartPole::name, CartPole::URI);
- }
-
- CartPole::CartPole(const RESTApiServerWrapper& api_server,
- const uint_t cidx)
- :
- GymnasiumEnvBase >,
- ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server,
- cidx,
- CartPole::name)
+ ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server, CartPole::name)
{
this -> get_api_server().register_if_not(CartPole::name, CartPole::URI);
}
@@ -56,29 +45,23 @@ namespace envs::gymnasium
void
CartPole::make(const std::string& version,
- const std::unordered_map& options){
+ const std::unordered_map& options,
+ const std::unordered_map