diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 032e1f4c..6c35392d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,4 +1,4 @@ -name: Build rlenvscpp +name: Build bitrl on: push: @@ -21,7 +21,39 @@ jobs: - uses: actions/checkout@v4 - name: Install dependencies run: | - sudo apt-get install -y g++ cmake libboost-all-dev libgtest-dev libeigen3-dev libblas-dev + sudo apt-get install -y \ + build-essential \ + cmake \ + git \ + libboost-all-dev \ + libgtest-dev \ + libeigen3-dev \ + libblas-dev \ + libopencv-dev + # g++ cmake libboost-all-dev libgtest-dev libeigen3-dev libblas-dev + - name: Build and install Paho MQTT C + run: | + git clone https://github.com/eclipse/paho.mqtt.c.git + cd paho.mqtt.c + cmake -Bbuild -H. \ + -DPAHO_WITH_SSL=ON \ + -DPAHO_BUILD_SHARED=ON \ + -DPAHO_BUILD_STATIC=OFF + cmake --build build + sudo cmake --install build + sudo ldconfig + - name: Build and install Paho MQTT C++ + run: | + git clone https://github.com/eclipse/paho.mqtt.cpp.git + cd paho.mqtt.cpp + cmake -Bbuild -H. \ + -DPAHO_WITH_SSL=ON \ + -DPAHO_BUILD_SHARED=ON \ + -DPAHO_BUILD_STATIC=OFF + cmake --build build + sudo cmake --install build + sudo ldconfig + - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type diff --git a/CMakeLists.txt b/CMakeLists.txt index 01e81d26..b40517ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ MESSAGE(STATUS "Using CMake ${CMAKE_VERSION}") SET(BITRL_VERSION_MAJOR 1) -SET(BITRL_VERSION_MINOR 2) +SET(BITRL_VERSION_MINOR 8) SET(BITRL_VERSION_PATCH 0) SET(BITRL_VERSION "${BITRL_VERSION_MAJOR}.${BITRL_VERSION_MINOR}.${BITRL_VERSION_PATCH}") diff --git a/README.md b/README.md index d39b5e4e..ca0e47b0 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,8 @@ # bitrl ```bitrl``` is an effort to provide implementations and wrappers of environments suitable for training reinforcement learning agents -using C++. +using C++. The documentation for the library can be found here. -Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi. -See also Working with Webots -for how to integrate ```bitrl``` with Webots. - -Various RL algorithms using the environments can be found at cuberl. - -The documentation for the library can be found here. The following is an example how to use the ``FrozenLake`` environment from Gymnasium. @@ -18,7 +11,7 @@ The following is an example how to use the #include "bitrl/bitrl_types.h" #include "bitrl/envs/gymnasium/toy_text/frozen_lake_env.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include #include @@ -32,7 +25,6 @@ const std::string SERVER_URL = "http://0.0.0.0:8001/api"; using bitrl::envs::gymnasium::FrozenLake; using bitrl::envs::RESTApiServerWrapper; - void test_frozen_lake(const RESTApiServerWrapper& server){ FrozenLake<4> env(server); @@ -40,21 +32,26 @@ void test_frozen_lake(const RESTApiServerWrapper& server){ std::cout<<"Environame URL: "< options; - options.insert({"is_slippery", false}); - env.make("v1", options); + std::unordered_map make_ops; + make_ops.insert({"is_slippery", false}); + + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + env.make("v1", make_ops, reset_ops); std::cout<<"Is environment created? "<()); + auto time_step = env.reset(); std::cout<<"Reward on reset: "<bitrl-rest-api +Various RL algorithms using the environments can be found at cuberl. + +Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi. +See also Working with Webots +for how to integrate ```bitrl``` with Webots. + diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5c840212..0b2894f6 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -25,8 +25,8 @@ IF(BITRL_WEBOTS) ENDIF() ADD_SUBDIRECTORY(example_1) -ADD_SUBDIRECTORY(example_2) -ADD_SUBDIRECTORY(example_3) +#ADD_SUBDIRECTORY(example_2) +#ADD_SUBDIRECTORY(example_3) #ADD_SUBDIRECTORY(example_4) ADD_SUBDIRECTORY(example_5) ADD_SUBDIRECTORY(example_6) diff --git a/examples/box2d/box2d_example.cpp b/examples/box2d/box2d_example.cpp index a90aadc4..48587126 100644 --- a/examples/box2d/box2d_example.cpp +++ b/examples/box2d/box2d_example.cpp @@ -3,17 +3,19 @@ // #include "bitrl/envs/gymnasium/box2d/lunar_lander_env.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include #include #include +#include "../../src/bitrl/sensors/ekf_sensor_fusion.h" + namespace box2d_example { + using namespace bitrl; const std::string SERVER_URL = "http://0.0.0.0:8001/api"; using bitrl::real_t; - using bitrl::envs::RESTApiServerWrapper; using bitrl::envs::gymnasium::LunarLanderDiscreteEnv; using bitrl::envs::gymnasium::LunarLanderContinuousEnv; } @@ -22,7 +24,7 @@ int main() { using namespace box2d_example; - RESTApiServerWrapper server(SERVER_URL, true); + bitrl::network::RESTRLEnvClient server(SERVER_URL, true); std::unordered_map options; options["wind_power"] = std::any(static_cast(10.0)); @@ -57,7 +59,9 @@ int main() std::cout<<"Working with LunarLanderContinuousEnv..."< reset_options; + env.make("v3", options, reset_options); std::cout<<"Is environment created? "< action = {0.8, 0.9}; time_step = env.step(action); std::cout<<"Time step: "< #include @@ -11,6 +11,7 @@ #include namespace example_1{ + using namespace bitrl; const std::string SERVER_URL = "http://0.0.0.0:8001/api"; @@ -18,31 +19,36 @@ using bitrl::envs::gymnasium::FrozenLake; using bitrl::envs::gymnasium::Taxi; using bitrl::envs::gymnasium::BlackJack; using bitrl::envs::gymnasium::CliffWorld; -using bitrl::envs::RESTApiServerWrapper; +using bitrl::network::RESTRLEnvClient; -void test_frozen_lake(const RESTApiServerWrapper& server){ +void test_frozen_lake(RESTRLEnvClient& server){ FrozenLake<4> env(server); std::cout<<"Environame URL: "< options; - options.insert({"is_slippery", false}); - env.make("v1", options); + std::unordered_map make_ops; + make_ops.insert({"is_slippery", false}); + + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + env.make("v1", make_ops, reset_ops); std::cout<<"Is environment created? "<()); + auto time_step = env.reset(); std::cout<<"Reward on reset: "<()); - - auto copy_env = env.make_copy(1); - copy_env.reset(); - - std::cout<<"Org env cidx: "< options; - env.make("v3", options); + std::unordered_map make_ops; + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + env.make("v3", make_ops, reset_ops); std::cout<<"Is environment created? "<()); + auto time_step = env.reset(); std::cout<<"Reward on reset: "<(item)<(item)< options; options["natural"] = true; + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + std::cout<<"Environment created..."<()); - - auto copy_env = env.make_copy(1); - copy_env.reset(); - - std::cout<<"Org env cidx: "< options; options["max_episode_steps"] = std::any(static_cast(10)); - env.make("v0", options); + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + env.make("v0", options, reset_ops); std::cout<<"Is environment created? "<()); + auto time_step = env.reset(); std::cout<<"Reward on reset: "<(item)<()); - - auto copy_env = env.make_copy(1); - copy_env.reset(); - - std::cout<<"Org env cidx: "< -#include - -int main(){ - - const std::string SERVER_URL = "http://0.0.0.0:8001/api"; - - bitrl::envs::RESTApiServerWrapper server_wrapper(SERVER_URL); - - auto has_gym = server_wrapper.has_gymnasium(); - std::cout<<"Has environment server Gymnasium? "< -#endif - -#include -#include -#include - - -namespace example{ - -using bitrl::uint_t; -using bitrl::envs::RESTApiServerWrapper; -const std::string SERVER_URL = "http://0.0.0.0:8001/api"; -const uint_t MAX_TRAJECTORY_SIZE = 10; - -typedef bitrl::envs::gymnasium::FrozenLake<4> env_type; - - -auto random_action_selector = [](auto /*state*/){ - - // randomly select an action - std::mt19937 gen(42); // mersenne_twister_engine seeded with rd() - std::uniform_int_distribution<> distrib(0, 3); - auto action = distrib(gen); - return action; - -}; - -} - -int main(){ - - using namespace example; - - RESTApiServerWrapper server(SERVER_URL, true); - env_type env(server); - - std::cout<<"Environame URL: "< options; - options.insert({"is_slippery", true}); - env.make("v1", options); - env.reset(42, std::unordered_map()); - - std::cout<<"Is environment created? "< #include #include +#include namespace example_5{ - + + using namespace bitrl; + using namespace bitrl::envs::grid_world; void create_static(){ std::cout<<"Creating STATIC Gridworld..."< env; + Gridworld<4> env; std::unordered_map options; + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); options["mode"] = std::any(GridWorldInitType::STATIC); - env.make("v0", options); + env.make("v0", options, reset_ops); std::cout<<"Number of actions: "< options; options["mode"] = std::any(GridWorldInitType::RANDOM); - env.make("v0", options); + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + + env.make("v0", options, reset_ops); std::cout<<"Number of actions: "< options; - env.make("v1", options); + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + env.make("v1", options, reset_ops); auto time_step = env.reset(); diff --git a/examples/example_7/example_7.cpp b/examples/example_7/example_7.cpp index 2b4840bc..725dacae 100644 --- a/examples/example_7/example_7.cpp +++ b/examples/example_7/example_7.cpp @@ -25,7 +25,7 @@ int main(){ std::unordered_map options; // make the environment - env.make("v1", options); + env.make("v1", options, options); std::cout<<"Is active? "< @@ -15,11 +15,11 @@ int main(){ using namespace bitrl::envs::gymnasium; using bitrl::uint_t; - using bitrl::envs::RESTApiServerWrapper; + using bitrl::network::RESTRLEnvClient; const std::string SERVER_URL = "http://0.0.0.0:8001/api"; - RESTApiServerWrapper server(SERVER_URL, true); + RESTRLEnvClient server(SERVER_URL, true); // Acrobot vector environment AcrobotV env(server); @@ -29,9 +29,12 @@ int main(){ std::unordered_map options; options["num_envs"] = std::any(static_cast(3)); - + + std::unordered_map reset_ops; + reset_ops.insert({"seed", static_cast(42)}); + // make the environment - env.make("v1", options); + env.make("v1", options, reset_ops); std::cout<<"Reseting the environment... "< @@ -10,179 +11,169 @@ namespace bitrl{ -namespace envs{ -namespace connect2{ - -const std::string Connect2::name = "Connect2"; - -Connect2::Connect2() -: -EnvBase>, - DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(0, "Connect2"), -discount_(1.0), -board_() -{} - -Connect2::Connect2(uint_t cidx) -: -EnvBase>, - DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(cidx, "Connect2"), -discount_(1.0), -board_() -{} - -Connect2::Connect2(const Connect2& other) -: -EnvBase>, - DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(other), -discount_(1.0), -board_(other.board_), -is_finished_(other.is_finished_) -{} - -void -Connect2::make(const std::string& /*version*/, - const std::unordered_map& /*options*/){ - - board_.resize(4, 0); - this -> set_version_("v1"); - this -> make_created_(); +namespace envs::connect2 +{ -} + const std::string Connect2::name = "Connect2"; -Connect2::time_step_type -Connect2::step(const action_type& action){ - return move(player_id_1_, action); - -} + Connect2::Connect2() + : + EnvBase>, + DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >( "Connect2"), + discount_(1.0), + board_() + {} -Connect2::time_step_type -Connect2::reset(uint_t /*seed*/, - const std::unordered_map& /*options*/){ - board_ = std::vector(4, 0); - is_finished_ = false; - this -> get_current_time_step_() = Connect2::time_step_type(TimeStepTp::FIRST, 0.0, board_, discount_); - return this -> get_current_time_step_(); -} -bool -Connect2::is_win(uint_t player)const noexcept{ - - auto player_sum = 0; - std::for_each(board_.begin(), - board_.end(), - [&player_sum, player](auto val){ - - if(val == player) - player_sum += 1; - }); - - return player_sum == win_val_; -} -std::vector -Connect2::get_valid_moves()const{ - - std::vector val_moves_; - val_moves_.reserve(4); - - for(uint_t i=0; i>, + DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t > >(other), + discount_(1.0), + board_(other.board_), + is_finished_(other.is_finished_) + {} + + void + Connect2::make(const std::string& /*version*/, + const std::unordered_map& options, + const std::unordered_map& reset_options){ + + board_.resize(4, 0); + this -> set_version_("v1"); + this -> make_created_(); + + auto idx = utils::uuid4(); + this -> set_idx_(idx); + this -> base_type::make("v1", options, reset_options); + this -> make_created_(); -bool -Connect2::has_legal_moves()const noexcept{ - - for(auto idx : board_){ - if(idx == 0){ - return true; - } } - - return false; -} + Connect2::time_step_type + Connect2::step(const action_type& action){ + return move(player_id_1_, action); -Connect2::time_step_type -Connect2::move(const uint_t pid, const action_type& action){ - - - if(pid != 1 && pid != 2){ - throw std::logic_error("Invalid player id: " + std::to_string(pid)); } - - if(action >= board_.size()){ - throw std::logic_error("Invalid action id: " + std::to_string(action)); + + Connect2::time_step_type + Connect2::reset(){ + board_ = std::vector(4, 0); + is_finished_ = false; + this -> get_current_time_step_() = Connect2::time_step_type(TimeStepTp::FIRST, 0.0, board_, discount_); + return this -> get_current_time_step_(); } - - if(is_finished_){ - return reset(); + + bool + Connect2::is_win(uint_t player)const noexcept{ + + auto player_sum = 0; + std::for_each(board_.begin(), + board_.end(), + [&player_sum, player](auto val){ + + if(val == player) + player_sum += 1; + }); + + return player_sum == win_val_; } - - auto valid_move = true; - if(board_[action] != 0){ - valid_move = false; + + std::vector + Connect2::get_valid_moves()const{ + + std::vector val_moves_; + val_moves_.reserve(4); + + for(uint_t i=0; i= board_.size()){ + throw std::logic_error("Invalid action id: " + std::to_string(action)); } - - auto val_moves = get_valid_moves(); - - std::unordered_map extra; - extra["valid_moves"] = std::any(val_moves); - return Connect2::time_step_type(step_type, reward, - board_, discount_, - std::move(extra)); + + if(is_finished_){ + return reset(); + } + + auto valid_move = true; + if(board_[action] != 0){ + valid_move = false; + } + + if(valid_move){ + // this position on the board + // is occupied by the given player + board_[action] = pid; + + bool won = is_win(pid); + bool has_moves = has_legal_moves(); + + // there may be more moves to make in the game + // but the player may have won. That's why we look + // at the won variable first + auto step_type = TimeStepTp::INVALID_TYPE; + auto reward = 0.0; + if(won){ + step_type = TimeStepTp::LAST; + is_finished_ = true; + reward = 1.0; + } + else if(has_moves){ + // the player has not won the game + // and there may be more moves + step_type = TimeStepTp::MID; + reward = 0.0; + + } + else{ + // the player lost the game + step_type = TimeStepTp::LAST; + is_finished_ = true; + reward = -1.0; + } + + auto val_moves = get_valid_moves(); + + std::unordered_map extra; + extra["valid_moves"] = std::any(val_moves); + return Connect2::time_step_type(step_type, reward, + board_, discount_, + std::move(extra)); + } + + throw std::logic_error("Move: " + std::to_string(action) + " is invalid"); + } - - throw std::logic_error("Move: " + std::to_string(action) + " is invalid"); - -} -Connect2 -Connect2::make_copy(uint_t cidx)const{ - Connect2 copy(cidx); - std::unordered_map ops; - auto ver = this -> version(); - copy.make(ver, ops); - return copy; -} - -} + } } \ No newline at end of file diff --git a/src/bitrl/envs/connect2/connect2_env.h b/src/bitrl/envs/connect2/connect2_env.h index 11cd5a5c..03193faf 100644 --- a/src/bitrl/envs/connect2/connect2_env.h +++ b/src/bitrl/envs/connect2/connect2_env.h @@ -81,12 +81,7 @@ class Connect2 final: public EnvBase>, /// \brief Constructor /// Connect2(); - - /// - /// \brief Constructor - /// - explicit Connect2(uint_t cidx); - + /// /// /// @@ -97,7 +92,8 @@ class Connect2 final: public EnvBase>, /// environment will be slippery /// virtual void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// /// \brief step. Move in the environment with the given action @@ -115,8 +111,7 @@ class Connect2 final: public EnvBase>, /// /// \brief Reset the environment /// - virtual time_step_type reset(uint_t /*seed*/, - const std::unordered_map& /*options*/)override final; + virtual time_step_type reset()override final; /// /// \brief Create a new copy of the environment with the given @@ -155,9 +150,7 @@ class Connect2 final: public EnvBase>, std::vector get_valid_moves()const; private: - - - + /// /// \brief The discount factor /// diff --git a/src/bitrl/envs/env_base.h b/src/bitrl/envs/env_base.h index 7e57fe57..f26cd8a7 100644 --- a/src/bitrl/envs/env_base.h +++ b/src/bitrl/envs/env_base.h @@ -17,208 +17,189 @@ namespace bitrl{ namespace envs{ - -/// -/// \class EnvBase -/// \brief Base class for environments. -/// -/// The EnvBase class establishes the minimum contract -/// that an RL environment should expose. -/// +/** + * @brief Base class interface for Reinforcement Learning environments. + * + * This class defines the minimum API contract that any RL environment + * must implement. It exposes functionality for creating, resetting, + * and stepping through an environment, while tracking configuration + * such as version, options, and current state. + * + * @tparam TimeStepType Type returned after each environment step + * @tparam SpaceType Environment's space interface type providing + * state and action space definitions + */ template class EnvBase: public SpaceType, public synchronized_env_mixin { public: - static_assert(std::is_default_constructible::value && "TimeStepType should be default constructible"); - static_assert(std::is_default_constructible::value && "SpaceType should be default constructible"); + static_assert(std::is_default_constructible::value && + "TimeStepType should be default constructible"); + static_assert(std::is_default_constructible::value && + "SpaceType should be default constructible"); - /// - /// \brief Default seed to use - /// + /** @brief Default seed used in reset() if none provided */ static const uint_t DEFAULT_ENV_SEED = 42; - /// - /// \brief The time step type we return every time a step in the - /// environment is performed - /// + /** @brief Alias for the type returned when stepping the environment */ typedef TimeStepType time_step_type; - /// - /// \brief The type describing the state space for the environment - /// + /** @brief Type describing the environment state space */ typedef typename SpaceType::state_space state_space_type; - /// - /// \brief The type of the state - /// + /** @brief Type describing an individual state */ typedef typename SpaceType::state_type state_type; - /// - /// \brief The type of the action space for the environment - /// + /** @brief Type describing the environment action space */ typedef typename SpaceType::action_space action_space_type; - /// - /// \brief The type of the action to be undertaken in the environment - /// + /** @brief Type representing an individual action */ typedef typename SpaceType::action_type action_type; - /// - /// \brief Destructor - /// + /** @brief Virtual destructor */ virtual ~EnvBase()=default; - /// - /// \brief make. Builds the environment. - /// \param version. the version of the environment to build - /// \param options. Options to use for building the environment. - /// Concrete classes may choose to hold a copy - /// + /** + * @brief Construct the environment instance. + * + * @param version Version string used to control environment variant + * @param make_options Key-value configuration options for environment creation. + * @param reset_options Key-value configuration how the environment should be reset + * + * @note Derived classes should use set_version_() and set_make_options_() + * internally. They may store selected options for later use. + */ virtual void make(const std::string& version, - const std::unordered_map& options) = 0; + const std::unordered_map& make_options, + const std::unordered_map& reset_options) = 0; - /// - /// \brief close the environment - /// + /** @brief Close and release any acquired environment resources */ virtual void close()=0; - /// - /// \brief Reset the environment - /// \param seed. The seed to use for resetting the environment - /// \param options. Options to use for resetting the environment. - /// - virtual time_step_type reset(uint_t seed, - const std::unordered_map& options)=0; - - /// - /// \brief Reset the environment always using the same seed - /// - time_step_type reset(){ - return reset(DEFAULT_ENV_SEED, std::unordered_map());} - - /// - /// \brief Reset the environment always using the provided seed - /// - time_step_type reset(uint_t seed){ - return reset(seed, std::unordered_map());} - - /// - /// \brief step in the environment by performing the given action - /// \param action. The action to execute in the environment - /// \return An instance of time_step_type + /** + * @brief Reset the environment to an initial state using the reset + * options specified during make. + * + * @return Initial time step after reset + */ + virtual time_step_type reset()=0; + + /** + * @brief Perform one step in the environment using an action. + * + * @param action Action applied to the environment + * @return New time step after executing the action + */ virtual time_step_type step(const action_type& action)=0; - - /// - /// \brief is_created Returns true is make has been called successfully - /// - bool is_created()const noexcept{return is_created_;} - - /// - /// \brief Returns the version of the environment - /// - std::string version()const noexcept{return version_;} - - /// - /// \brief Returns the name of the environment - /// - std::string env_name()const noexcept{return name_;} - - /// - /// \brief Returns a read reference to the options passed when calling make - /// + + /** + * @brief Access the configuration options provided to make(). + * @return Map of option keys and values + */ const std::unordered_map& make_options()const noexcept{return make_options_;} - - /// - /// \brief Read the option with the given name - /// + + /** + * @brief Access the configuration options provided to make(). + * @return Map of option keys and values + */ + const std::unordered_map& reset_options()const noexcept{return reset_options_;} + + /** + * @brief Read a specific make() option and cast it to the requested type. + * + * @tparam T Expected data type + * @param op_name Key of the option to read + * @return Requested value if present + * @throws std::bad_any_cast If stored type does not match T + */ template T read_option(const std::string& op_name)const; - /// - /// \brief Returns the index of the environment that is active within - /// a simulation - /// - uint_t cidx()const noexcept{return cidx_;} + /** + * @brief Get the id identifying this environment within a simulation batch. + * The id is valid only if make has been called + * @return Copy index + */ + std::string idx()const noexcept{return idx_;} + + /** + * @brief Check if make() has successfully initialized the environment. + * @return True if environment is ready, false otherwise + */ + bool is_created()const noexcept{return is_created_;} + + /** + * @brief Get the name of this environment instance. + * @return Environment name + */ + std::string env_name()const noexcept{return name_;} + + /** + * @brief Get the environment version set during make(). + * @return Version string + */ + std::string version()const noexcept{return version_;} protected: - /// - /// \brief Constructor - /// - explicit EnvBase(const uint_t cidx=0, + /** + * @brief Constructor (protected — for subclassing only). + * @param cidx Copy index used in multi-environment simulations + * @param name Name of the environment instance + */ + explicit EnvBase(const std::string& idx=bitrl::consts::INVALID_STR, const std::string& name=bitrl::consts::INVALID_STR); - /// - /// \brief Copy constructor - /// + /** @brief Copy constructor */ EnvBase(const EnvBase&); - /// - /// \brief Helper function to set the version. - /// To be called only when the make is called - /// + /** + * @brief Set internal version string. + * @note Should be called only inside make() + */ void set_version_(const std::string& version )noexcept{version_ = version;} + + /** + * @brief Set the id of the environment + * @param idx + */ + void set_idx_(const std::string& idx)noexcept{idx_ = idx;} - /// - /// \brief Set the make options - /// + /** @brief Store make() options for future access */ void set_make_options_(const std::unordered_map& options) noexcept{make_options_ = options;} - /// - /// \brief - /// + /** @brief Mark environment as not created */ void invalidate_is_created_flag_()noexcept{is_created_ = false;} - /// - /// \brief mark the environment as created - /// + /** @brief Mark environment creation as successful */ void make_created_()noexcept{is_created_= true;} - + + /** @brief Mutable access to the current time step */ time_step_type& get_current_time_step_()noexcept{return current_state_;} + + /** @brief Read-only access to the current time step */ const time_step_type& get_current_time_step_()const noexcept{return current_state_;} private: - /// - /// \brief Flag indicating if the environment has been created - /// - bool is_created_; - - /// - /// The copy idx - /// - uint_t cidx_; - - /// - /// \brief Version of the environment - /// - std::string version_; - - /// - /// \brief Name of the environment - /// - const std::string name_; - - /// - /// \brief Copy of the options upon calling make - /// - std::unordered_map make_options_; - - /// - /// \brief current_state - /// - time_step_type current_state_; + bool is_created_; ///< Indicates that make() has finished successfully + std::string idx_; ///< Environment instance id + std::string version_; ///< Environment version identifier + const std::string name_; ///< Environment name + std::unordered_map make_options_; ///< Copied options from make() + std::unordered_map reset_options_; ///< Copied options from make() + time_step_type current_state_; ///< Latest environment time step }; template -EnvBase::EnvBase(const uint_t cidx, const std::string& name) +EnvBase::EnvBase(const std::string& idx, const std::string& name) : SpaceType(), synchronized_env_mixin(), is_created_(false), -cidx_(cidx), +idx_(idx), version_(), name_(name), current_state_() @@ -230,7 +211,7 @@ EnvBase::EnvBase(const EnvBase SpaceType(), synchronized_env_mixin(), is_created_(other.is_created_), -cidx_(other.cidx_), +idx_(other.idx_), version_(other.version_), name_(other.name_), current_state_() @@ -255,6 +236,16 @@ EnvBase::read_option(const std::string& op_name)const{ throw std::logic_error("Option: " + op_name + " not found"); } +template +void EnvBase::make(const std::string& version, + const std::unordered_map& make_options, + const std::unordered_map& reset_options) +{ + version_ = version; + make_options_ = make_options; + reset_options_ = reset_options; +} + } } diff --git a/src/bitrl/envs/gdrl/gym_walk.h b/src/bitrl/envs/gdrl/gym_walk.h index 17264104..6fd8e703 100644 --- a/src/bitrl/envs/gdrl/gym_walk.h +++ b/src/bitrl/envs/gdrl/gym_walk.h @@ -24,6 +24,8 @@ #include #include +#include "../../sensors/ekf_sensor_fusion.h" + #ifdef BITRL_DEBUG #include #endif @@ -32,16 +34,15 @@ namespace bitrl{ namespace envs::gdrl { - - /// -/// \brief class GymWalk. Interface for the GymWalk environment -/// - template - class GymWalk final: public EnvBase, +/** + * GymWalk. Interface for the GymWalk environment + */ +template +class GymWalk final: public EnvBase, ScalarDiscreteEnv > - { - public: +{ +public: /// /// \brief name @@ -91,13 +92,7 @@ namespace envs::gdrl /// /// \brief Constructor /// - GymWalk(const RESTApiServerWrapper& api_server); - - /// - /// \brief Constructor - /// - GymWalk(const RESTApiServerWrapper& api_server, - const uint_t cidx); + GymWalk(const network::RESTApiServerWrapper& api_server); /// /// \brief copy ctor @@ -109,7 +104,8 @@ namespace envs::gdrl /// environment will be slippery /// virtual void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// @@ -132,9 +128,7 @@ namespace envs::gdrl /// /// \brief Reset the environment /// - virtual time_step_type reset(uint_t seed, - const std::unordered_map& options)override final; - + virtual time_step_type reset()override final; /// /// \brief Create a new copy of the environment with the given @@ -154,7 +148,7 @@ namespace envs::gdrl uint_t n_actions()const noexcept{return action_space_type::size;} - private: +private: /// /// \brief build the dynamics from response @@ -167,7 +161,7 @@ namespace envs::gdrl time_step_type create_time_step_from_response_(const nlohmann::json& response) const; - RESTApiServerWrapper api_server_; + network::RESTApiServerWrapper* api_server_; }; template @@ -176,29 +170,16 @@ namespace envs::gdrl template - GymWalk::GymWalk(const RESTApiServerWrapper& api_server) + GymWalk::GymWalk(network::RESTApiServerWrapper& api_server) : EnvBase, ScalarDiscreteEnv - >(0, GymWalk::name), - api_server_(api_server_) + >(GymWalk::name), + api_server_(&api_server_) { api_server_.register_if_not(GymWalk::name, GymWalk::URI); } - template - GymWalk::GymWalk(const RESTApiServerWrapper& api_server, - const uint_t cidx) - : - EnvBase, - ScalarDiscreteEnv - >(cidx, "GymWalk"), - api_server_(api_server) - { - api_server_.register_if_not(GymWalk::name, GymWalk::URI); - } - - template GymWalk::GymWalk(const GymWalk& other) : @@ -232,19 +213,21 @@ namespace envs::gdrl template void GymWalk::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } auto response = api_server_.make(this->env_name(), - this->cidx(), version, ops); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } template @@ -260,7 +243,7 @@ namespace envs::gdrl } const auto response = api_server_.step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); @@ -289,8 +272,7 @@ namespace envs::gdrl template typename GymWalk::time_step_type - GymWalk::reset(uint_t seed, - const std::unordered_map& /*options*/){ + GymWalk::reset(){ if(!this->is_created()){ #ifdef RLENVSCPP_DEBUG @@ -300,24 +282,13 @@ namespace envs::gdrl } auto response = this -> api_server_.reset(this->env_name(), - this -> cidx(), seed, + this -> idx(), seed, nlohmann::json()); this->create_time_step_from_response_(response); return this -> get_current_time_step_(); } - template - GymWalk - GymWalk::make_copy(uint_t cidx)const{ - - GymWalk copy(api_server_ ,cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - } - } } diff --git a/src/bitrl/envs/grid_world/grid_world_env.h b/src/bitrl/envs/grid_world/grid_world_env.h index 023ca463..f5f2c8db 100644 --- a/src/bitrl/envs/grid_world/grid_world_env.h +++ b/src/bitrl/envs/grid_world/grid_world_env.h @@ -14,6 +14,7 @@ #include "bitrl/envs/time_step.h" #include "bitrl/envs/env_base.h" #include "bitrl/envs/space_type.h" +#include "bitrl/utils/utils.h" #ifdef BITRL_DEBUG #include @@ -191,52 +192,36 @@ namespace envs::grid_world [[nodiscard]] board_move_type validate_move(board_component_type piece, board_position pos)const; }; - template - struct GridWorldEnv - { - - typedef detail::board state_space; - - typedef detail::board_state_type state_type; - - /// - /// \brief state space size - /// - static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size; - - /// - /// \brief the action space type - /// - typedef ScalarDiscreteSpace<0, 4> action_space; - - /// - /// \brief the Action type - /// - typedef action_space::space_item_type action_type; +template +struct GridWorldEnv +{ - /// - /// \brief action space size - /// - static constexpr uint_t ACTION_SPACE_SIZE = action_space::size; - }; - } + typedef detail::board state_space; + typedef detail::board_state_type state_type; + static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size; + typedef ScalarDiscreteSpace<0, 4> action_space; + typedef action_space::space_item_type action_type; + static constexpr uint_t ACTION_SPACE_SIZE = action_space::size; +}; +} - /// - /// The Gridworld class models a square board. There are three ways to initialize the board. - /// - static - /// - random - /// - player - /// See the GridworldInitType enumeration. - /// Static initialization means that the objects on the board are initialized at the same predetermined locations. - /// Player initialization means that the player is initialized at a random position on the board. - /// Random initialization means that all the objects are placed randomly - /// - template - class Gridworld final: public EnvBase, +/** + * The Gridworld class models a square board. There are three ways to initialize the board. + * - static + * - random + * - player + * See the GridworldInitType enumeration. + * Static initialization means that the objects on the board are initialized at the same predetermined locations. + * Player initialization means that the player is initialized at a random position on the board. + * Random initialization means that all the objects are placed randomly + */ + +template +class Gridworld final: public EnvBase, detail::GridWorldEnv> - { - public: +{ +public: static_assert (side_size_ >= 4, "The side size should be greater than or equal to 4"); @@ -300,11 +285,6 @@ namespace envs::grid_world /// /// \brief Gridworld. Constructor - /// - explicit Gridworld(const uint_t cidx); - - /// - /// \brief Gridworld. Constructor /// Gridworld(const Gridworld& other); @@ -313,19 +293,13 @@ namespace envs::grid_world /// environment will be slippery /// void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// /// \brief Reset the environment /// - time_step_type reset(uint_t /*seed*/, - const std::unordered_map& /*options*/) override final; - - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - Gridworld make_copy(uint_t cidx)const; + time_step_type reset() override final; /// /// \brief step @@ -379,7 +353,7 @@ namespace envs::grid_world /// [[nodiscard]] GridWorldInitType init_type()const noexcept{return init_mode_;} - private: +private: /// /// \brief init_mode_ @@ -420,20 +394,7 @@ namespace envs::grid_world Gridworld::Gridworld() : EnvBase, - detail::GridWorldEnv>(0, "Gridworld"), - init_mode_(GridWorldInitType::INVALID_TYPE), - randomize_state_(false), - seed_(0), - noise_factor_(0.0), - board_() - { - } - - template - Gridworld::Gridworld(uint_t cidx) - : - EnvBase, - detail::GridWorldEnv>(cidx, "Gridworld"), + detail::GridWorldEnv>(Gridworld::name), init_mode_(GridWorldInitType::INVALID_TYPE), randomize_state_(false), seed_(0), @@ -442,7 +403,6 @@ namespace envs::grid_world { } - template Gridworld::Gridworld(const Gridworld& other) : @@ -459,7 +419,8 @@ namespace envs::grid_world template void Gridworld::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this -> is_created()){ return; @@ -491,22 +452,14 @@ namespace envs::grid_world // to created this->set_version_(version); this->make_created_(); - } - template - Gridworld - Gridworld::make_copy(uint_t cidx)const{ - - Gridworld copy(cidx); - std::unordered_map ops; - ops["randomize_state"] = this -> has_random_state(); - ops["noise_factor"] = this -> noise_factor_; - ops["seed"] = std::any(static_cast(this -> seed_)); - auto version = this -> version(); - copy.make(version, ops); - return copy; + auto idx = utils::uuid4(); + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } + template typename Gridworld::time_step_type Gridworld::step(const action_type& action){ @@ -526,8 +479,7 @@ namespace envs::grid_world template typename Gridworld::time_step_type - Gridworld::reset(uint_t /*seed*/, - const std::unordered_map& /*options*/){ + Gridworld::reset(){ // reinitialize the board auto obs = board_.init_board(side_size_, init_mode_); diff --git a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp index d6234b13..97cf8a00 100644 --- a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp +++ b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.cpp @@ -16,14 +16,9 @@ namespace bitrl const std::string LunarLanderContinuousEnv::name = "LunarLanderContinuous"; const std::string LunarLanderContinuousEnv::URI = "/gymnasium/lunar-lander-continuous-env"; - LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server) + LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(network::RESTRLEnvClient& api_server) : - LunarLanderDiscreteEnv::base_type(api_server, 0, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI) - {} - - LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server, const uint_t cidx) - : - LunarLanderDiscreteEnv::base_type(api_server, cidx, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI) + LunarLanderDiscreteEnv::base_type(api_server, LunarLanderDiscreteEnv::name, LunarLanderDiscreteEnv::URI) {} LunarLanderDiscreteEnv::LunarLanderDiscreteEnv(const LunarLanderDiscreteEnv& other) @@ -31,42 +26,18 @@ namespace bitrl LunarLanderDiscreteEnv::base_type(other) {} - LunarLanderDiscreteEnv - LunarLanderDiscreteEnv::make_copy(uint_t cidx)const - { - LunarLanderDiscreteEnv copy(this -> get_api_server(), cidx); - auto version = this -> version(); - copy.make(version, this -> make_options()); - return copy; - } - - LunarLanderContinuousEnv::LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server) + LunarLanderContinuousEnv::LunarLanderContinuousEnv(network::RESTRLEnvClient& api_server) : - LunarLanderContinuousEnv::base_type(api_server, 0, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI) + LunarLanderContinuousEnv::base_type(api_server, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI) {} - LunarLanderContinuousEnv::LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server, const uint_t cidx) - : - LunarLanderContinuousEnv::base_type(api_server, cidx, LunarLanderContinuousEnv::name, LunarLanderContinuousEnv::URI) - {} LunarLanderContinuousEnv::LunarLanderContinuousEnv(const LunarLanderContinuousEnv& other) : LunarLanderContinuousEnv::base_type(other) {} - LunarLanderContinuousEnv - LunarLanderContinuousEnv::make_copy(uint_t cidx)const - { - - LunarLanderContinuousEnv copy(this -> get_api_server(), cidx); - auto version = this -> version(); - copy.make(version, this -> make_options()); - return copy; - - } - } } diff --git a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h index 7ff0d463..bb82e165 100644 --- a/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h +++ b/src/bitrl/envs/gymnasium/box2d/lunar_lander_env.h @@ -8,7 +8,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/gymnasium_env_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/envs/env_types.h" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -59,8 +59,7 @@ namespace bitrl /// @param name /// @param uri /// - _LunarLanderEnv(const RESTApiServerWrapper& api_server, uint_t cidx, - const std::string& name, const std::string& uri); + _LunarLanderEnv(network::RESTRLEnvClient& api_server, const std::string& name, const std::string& uri); /// /// @param other @@ -76,7 +75,8 @@ namespace bitrl /// \brief make. Build the environment /// virtual void make(const std::string& version, - const std::unordered_map& /*options*/) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// /// \brief step. Step in the environment following the given action @@ -98,10 +98,10 @@ namespace bitrl }; template - _LunarLanderEnv::_LunarLanderEnv(const RESTApiServerWrapper& api_server, uint_t cidx, + _LunarLanderEnv::_LunarLanderEnv(network::RESTRLEnvClient& api_server, const std::string& name, const std::string& uri) : - GymnasiumEnvBase(api_server, cidx, name) + GymnasiumEnvBase(api_server, name) { this -> get_api_server().register_if_not(name, uri); } @@ -115,7 +115,8 @@ namespace bitrl template void _LunarLanderEnv::make(const std::string& version, - const std::unordered_map& options) + const std::unordered_map& options, + const std::unordered_map& reset_options) { if(this->is_created()){ return; @@ -148,11 +149,11 @@ namespace bitrl } auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, ops); - this -> set_version_(version); - this -> set_make_options_(options); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); this -> make_created_(); } @@ -166,11 +167,11 @@ namespace bitrl #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } const auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); @@ -199,10 +200,10 @@ namespace bitrl /// /// \brief LunarLanderDiscreteEnv environment with discrete action space /// - class LunarLanderDiscreteEnv final : public lunar_lander_detail::_LunarLanderEnv>, +class LunarLanderDiscreteEnv final : public lunar_lander_detail::_LunarLanderEnv>, ContinuousVectorStateDiscreteActionEnv<8, 4, 0, real_t>> - { - public: +{ +public: /// /// \brief name /// @@ -248,12 +249,7 @@ namespace bitrl /// Constructor /// @param api_server /// - LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server); - - /// - /// \brief Constructor - /// - LunarLanderDiscreteEnv(const RESTApiServerWrapper& api_server, const uint_t cidx); + LunarLanderDiscreteEnv(network::RESTRLEnvClient& api_server); /// /// @param other @@ -265,21 +261,16 @@ namespace bitrl /// ~LunarLanderDiscreteEnv() override =default; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - LunarLanderDiscreteEnv make_copy(uint_t cidx)const; - }; - /// - /// \brief LunarLanderDiscreteEnv environment with discrete action space - /// - class LunarLanderContinuousEnv final : public lunar_lander_detail::_LunarLanderEnv>, +/** + * + * LunarLanderDiscreteEnv environment with discrete action space + */ +class LunarLanderContinuousEnv final : public lunar_lander_detail::_LunarLanderEnv>, ContinuousVectorStateContinuousVectorActionEnv<8, 2, real_t, real_t>> - { - public: +{ +public: /// /// \brief name /// @@ -325,12 +316,7 @@ namespace bitrl /// Constructor /// @param api_server /// - LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server); - - /// - /// \brief Constructor - /// - LunarLanderContinuousEnv(const RESTApiServerWrapper& api_server, const uint_t cidx); + LunarLanderContinuousEnv(network::RESTRLEnvClient& api_server); /// /// @param other @@ -342,12 +328,6 @@ namespace bitrl /// ~LunarLanderContinuousEnv() override =default; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - LunarLanderContinuousEnv make_copy(uint_t cidx)const; - }; } } diff --git a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp index ec5e386c..a49d1d15 100644 --- a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp +++ b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.cpp @@ -26,28 +26,16 @@ namespace envs::gymnasium } - Acrobot::Acrobot(const RESTApiServerWrapper& api_server) + Acrobot::Acrobot(network::RESTRLEnvClient& api_server) : GymnasiumEnvBase >, ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t > - >(api_server, - 0, - Acrobot::name) + >(api_server, Acrobot::name) { this -> get_api_server().register_if_not(Acrobot::name, Acrobot::URI); } - Acrobot::Acrobot(const RESTApiServerWrapper& api_server, - const uint_t cidx) - : - GymnasiumEnvBase >, - ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t > - >(api_server, - cidx, - Acrobot::name) - { - this -> get_api_server().register_if_not(Acrobot::name, Acrobot::URI); - } + Acrobot::Acrobot(const Acrobot& other) : @@ -58,18 +46,20 @@ namespace envs::gymnasium void Acrobot::make(const std::string& version, - const std::unordered_map& /*options*/){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, nlohmann::json()); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } @@ -81,29 +71,15 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } const auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); - } - - Acrobot - Acrobot::make_copy(uint_t cidx)const{ - - Acrobot copy(this->get_api_server(), cidx); - - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - } - - - +} } } diff --git a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h index e299335f..52c70bf9 100644 --- a/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h +++ b/src/bitrl/envs/gymnasium/classic_control/acrobot_env.h @@ -122,7 +122,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/gymnasium_env_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/envs/env_types.h" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -132,126 +132,100 @@ #include namespace bitrl{ -namespace envs{ -namespace gymnasium{ +namespace envs::gymnasium +{ -/// + /// /// \brief class Acrobot. The Acrobot class. Interface for Gymnasium::Acrobot environment. /// class Acrobot final: public GymnasiumEnvBase >, - ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t > - > + ContinuousVectorStateDiscreteActionEnv<6, 2, 0, real_t > + > { public: - /// + /// /// \brief The name of the environment /// - static const std::string name; - - /// + static const std::string name; + + /// /// \brief The URI for accessing the environment /// - static const std::string URI; - - /// + static const std::string URI; + + /// /// \brief Base class type /// - typedef GymnasiumEnvBase >, - ContinuousVectorStateDiscreteActionEnv< 6, // size of state space - 2, // end of action space - 0, // start of action space - real_t> // type of state - >::base_type base_type; - /// + typedef GymnasiumEnvBase >, + ContinuousVectorStateDiscreteActionEnv< 6, // size of state space + 2, // end of action space + 0, // start of action space + real_t> // type of state + >::base_type base_type; + /// /// \brief The time step type we return every time a step in the /// environment is performed /// - typedef typename base_type::time_step_type time_step_type; - - /// + typedef typename base_type::time_step_type time_step_type; + + /// /// \brief The type describing the state space for the environment /// - typedef typename base_type::state_space_type state_space_type; - - /// + typedef typename base_type::state_space_type state_space_type; + + /// /// \brief The type of the action space for the environment /// - typedef typename base_type::action_space_type action_space_type; + typedef typename base_type::action_space_type action_space_type; - /// + /// /// \brief The type of the action to be undertaken in the environment /// - typedef typename base_type::action_type action_type; - - /// + typedef typename base_type::action_type action_type; + + /// /// \brief The type of the state /// - typedef typename base_type::state_type state_type; - - /// - /// \brief Acrobot. Constructor - /// \param api_server. The RESTApiServerWrapper instance to use - /// - Acrobot(const RESTApiServerWrapper& api_server ); - - /// - /// \brief Acrobot. Constructor - /// \param api_server. The RESTApiServerWrapper instance to use - /// \param cidx. The index to assign to the created environment - /// - Acrobot(const RESTApiServerWrapper& api_server, - const uint_t cidx); - - /// - /// \brief Acrobot. Copy constructor - /// - Acrobot(const Acrobot& other); - - /// - /// \brief ~Acrobot. Destructor - /// - ~Acrobot()=default; + typedef typename base_type::state_type state_type; - /// + + Acrobot(network::RESTRLEnvClient& api_server ); + + Acrobot(const Acrobot& other); + + ~Acrobot() override =default; + + /// /// \brief make. Build the environment /// \param version. The version of the environment to create /// \param options. The options to use when creating the environment /// - virtual void make(const std::string& version, - const std::unordered_map& options) override final; - - /// + virtual void make(const std::string& version, + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; + + /// /// \brief step. Step in the environment following the given action. /// \param action. The action to execute /// - virtual time_step_type step(const action_type& action) override final; - - /// - /// \brief Create a new copy of the environment with the given - /// copy index. - /// \param cidx. The index to assign to the copied environment - /// - Acrobot make_copy(uint_t cidx)const; + virtual time_step_type step(const action_type& action) override final; - /// + /// /// \brief n_actions. Returns the number of actions /// - uint_t n_actions()const noexcept{return action_space_type::size;} + uint_t n_actions()const noexcept{return action_space_type::size;} protected: - - /// + + /// /// \brief Handle the reset response from the environment server /// - virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; - -}; + virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; + }; - -} } } diff --git a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp index 8378a51e..768e1372 100644 --- a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp +++ b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.cpp @@ -29,21 +29,10 @@ namespace envs::gymnasium std::unordered_map()); } - CartPole::CartPole(const RESTApiServerWrapper& api_server) + CartPole::CartPole(network::RESTRLEnvClient& api_server) : GymnasiumEnvBase >, - ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server, 0, CartPole::name) - { - this -> get_api_server().register_if_not(CartPole::name, CartPole::URI); - } - - CartPole::CartPole(const RESTApiServerWrapper& api_server, - const uint_t cidx) - : - GymnasiumEnvBase >, - ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server, - cidx, - CartPole::name) + ContinuousVectorStateDiscreteActionEnv<4, 2, 0, real_t >>(api_server, CartPole::name) { this -> get_api_server().register_if_not(CartPole::name, CartPole::URI); } @@ -56,29 +45,23 @@ namespace envs::gymnasium void CartPole::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } - nlohmann::json ops; - auto has_rendering = options.find("render_mode"); - if(has_rendering != options.end()){ - auto render_str = std::any_cast(has_rendering->second); - ops["render_mode"] = render_str; - } - auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, ops); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } - CartPole::time_step_type CartPole::step(const action_type& action){ @@ -87,28 +70,16 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } const auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); } - CartPole - CartPole::make_copy(uint_t cidx)const{ - - - CartPole copy(this -> get_api_server(), cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - } - - } } diff --git a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.h b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.h index d0a9d610..724125f3 100644 --- a/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.h +++ b/src/bitrl/envs/gymnasium/classic_control/cart_pole_env.h @@ -42,7 +42,7 @@ #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/gymnasium_env_base.h" #include "bitrl/envs/env_types.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include @@ -121,14 +121,8 @@ class CartPole final: public GymnasiumEnvBase >, /// /// \brief CartPole. Constructor /// - CartPole(const RESTApiServerWrapper& api_server ); - - /// - /// \brief CartPole. Constructor - /// - CartPole(const RESTApiServerWrapper& api_server, - const uint_t cidx); - + CartPole(network::RESTRLEnvClient& api_server ); + /// /// \brief copy ctor /// @@ -137,26 +131,20 @@ class CartPole final: public GymnasiumEnvBase >, /// /// \brief ~CartPole. Destructor /// - ~CartPole()=default; + ~CartPole() override =default; /// /// \brief make. Build the environment /// virtual void make(const std::string& version, - const std::unordered_map&) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// /// \brief step. Step in the environment following the given action /// virtual time_step_type step(const action_type& action)override final; - - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - CartPole make_copy(uint_t cidx)const; - /// /// \brief n_actions. Returns the number of actions /// diff --git a/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.cpp b/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.cpp index d4b8af98..f082557a 100644 --- a/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.cpp +++ b/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.cpp @@ -36,22 +36,11 @@ namespace envs::gymnasium } - MountainCar::MountainCar(const RESTApiServerWrapper& api_server ) + MountainCar::MountainCar(network::RESTRLEnvClient& api_server ) : GymnasiumEnvBase>, ContinuousVectorStateDiscreteActionEnv<3, 2, 0, real_t > - >(api_server, 0, MountainCar::name) - { - this -> get_api_server().register_if_not(MountainCar::name, MountainCar::URI); - } - - - MountainCar::MountainCar(const RESTApiServerWrapper& api_server , - const uint_t cidx) - : - GymnasiumEnvBase>, - ContinuousVectorStateDiscreteActionEnv<3, 2, 0, real_t > - >(api_server, cidx, MountainCar::name) + >(api_server, MountainCar::name) { this -> get_api_server().register_if_not(MountainCar::name, MountainCar::URI); } @@ -65,21 +54,20 @@ namespace envs::gymnasium void MountainCar::make(const std::string& version, - const std::unordered_map& /*options*/){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, nlohmann::json()); - this->set_version_(version); - this->make_created_(); - - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } @@ -91,28 +79,17 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } const auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); - - } - - MountainCar - MountainCar::make_copy(uint_t cidx)const{ - - MountainCar copy(this -> get_api_server(), cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - } +} } } diff --git a/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.h b/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.h index bacb39be..2993dc94 100644 --- a/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.h +++ b/src/bitrl/envs/gymnasium/classic_control/mountain_car_env.h @@ -6,7 +6,7 @@ #include "bitrl/envs/gymnasium/gymnasium_env_base.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/env_types.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include @@ -21,26 +21,26 @@ namespace envs::gymnasium /// /// \brief The MountainCar class /// - class MountainCar final: public GymnasiumEnvBase>, +class MountainCar final: public GymnasiumEnvBase>, ContinuousVectorStateDiscreteActionEnv<3, 2, 0, real_t > > - { +{ - public: +public: /// - /// \brief name - /// + /// \brief name + /// static const std::string name; /// - /// \brief The URI for accessing the environment - /// + /// \brief The URI for accessing the environment + /// static const std::string URI; /// - /// \brief Base class type - /// + /// \brief Base class type + /// typedef GymnasiumEnvBase >, ContinuousVectorStateDiscreteActionEnv< 3, // size of state 2, // end of action space @@ -48,9 +48,9 @@ namespace envs::gymnasium real_t> // type of state >::base_type base_type; /// - /// \brief The time step type we return every time a step in the - /// environment is performed - /// + /// \brief The time step type we return every time a step in the + /// environment is performed + /// typedef typename base_type::time_step_type time_step_type; /// @@ -74,64 +74,49 @@ namespace envs::gymnasium typedef typename base_type::state_type state_type; /// - /// \brief MountainCar. Constructor. Creates an environment. - /// \param version The version of the environment - /// \param gym_namespace The boost::python open-ai gym namespace - /// \param do_create If true it calls make - /// - MountainCar(const RESTApiServerWrapper& api_server ); - + /// \brief MountainCar. Constructor. Creates an environment. + /// \param version The version of the environment + /// \param gym_namespace The boost::python open-ai gym namespace + /// \param do_create If true it calls make /// - /// \brief Constructor. Protected so that applications - /// cannot explicitly instantiate copies - /// - MountainCar(const RESTApiServerWrapper& api_server , - const uint_t cidx); + MountainCar(network::RESTRLEnvClient& api_server ); + /// - /// \brief copy ctor - /// + /// \brief copy ctor + /// MountainCar(const MountainCar& other); /// - /// \brief ~MountainCar. Destructor. - /// - ~MountainCar()=default; + /// \brief ~MountainCar. Destructor. + /// + ~MountainCar() override =default; /// - /// \brief make. Build the environment - /// + /// \brief make. Build the environment + /// virtual void make(const std::string& version, - const std::unordered_map& /*options*/) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// - /// \brief step - /// + /// \brief step + /// virtual time_step_type step(const action_type& action)override final; /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - MountainCar make_copy(uint_t cidx)const; - + /// \brief n_actions. Returns the number of actions /// - /// \brief n_actions. Returns the number of actions - /// uint_t n_actions()const noexcept{return action_space_type::size;} +protected: - protected: - /// - /// \brief Handle the reset response from the environment server - /// + /// \brief Handle the reset response from the environment server + /// virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; - - - - }; +}; } } #endif // MOUNTAIN_CAR_H diff --git a/src/bitrl/envs/gymnasium/classic_control/pendulum_env.cpp b/src/bitrl/envs/gymnasium/classic_control/pendulum_env.cpp index b9e18467..840dd9ce 100644 --- a/src/bitrl/envs/gymnasium/classic_control/pendulum_env.cpp +++ b/src/bitrl/envs/gymnasium/classic_control/pendulum_env.cpp @@ -28,33 +28,18 @@ Pendulum::create_time_step_from_response_(const nlohmann::json& response)const{ } -Pendulum::Pendulum(const RESTApiServerWrapper& api_server) +Pendulum::Pendulum(network::RESTRLEnvClient& api_server) : GymnasiumEnvBase>, ContinuousVectorStateContinuousScalarBoundedActionEnv<3, 1, RealRange<-2.0, 2.0>, 0, real_t> - >(api_server, 0, Pendulum::name) + >(api_server, Pendulum::name) { this -> get_api_server().register_if_not(Pendulum::name,Pendulum::URI); } -Pendulum::Pendulum(const RESTApiServerWrapper& api_server, - const uint_t cidx) -: -GymnasiumEnvBase>, - ContinuousVectorStateContinuousScalarBoundedActionEnv<3, - 1, - RealRange<-2.0, 2.0>, - 0, real_t> - >(api_server, - cidx, - Pendulum::name) -{ - this -> get_api_server().register_if_not(Pendulum::name,Pendulum::URI); -} - Pendulum::Pendulum(const Pendulum& other) : GymnasiumEnvBase>, @@ -67,18 +52,20 @@ GymnasiumEnvBase>, void Pendulum::make(const std::string& version, - const std::unordered_map& /*options*/){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, nlohmann::json()); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } @@ -90,27 +77,17 @@ Pendulum::step(const action_type& action){ #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); } - -Pendulum -Pendulum::make_copy(uint_t cidx)const{ - Pendulum copy(this -> get_api_server(), cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; -} - } } } \ No newline at end of file diff --git a/src/bitrl/envs/gymnasium/classic_control/pendulum_env.h b/src/bitrl/envs/gymnasium/classic_control/pendulum_env.h index 168dbbc9..62617404 100644 --- a/src/bitrl/envs/gymnasium/classic_control/pendulum_env.h +++ b/src/bitrl/envs/gymnasium/classic_control/pendulum_env.h @@ -81,7 +81,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/gymnasium_env_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include "bitrl/envs/env_types.h" @@ -97,15 +97,15 @@ namespace envs::gymnasium /// /// \brief The Pendulum class. Interface for Pendulum environment /// - class Pendulum final: public GymnasiumEnvBase>, +class Pendulum final: public GymnasiumEnvBase>, ContinuousVectorStateContinuousScalarBoundedActionEnv<3, 1, RealRange<-2.0, 2.0>, 0, real_t> > - { +{ - public: +public: /// /// \brief name @@ -156,13 +156,7 @@ namespace envs::gymnasium /// /// \brief Pendulum. Constructor /// - Pendulum(const RESTApiServerWrapper& api_server ); - - /// - /// \brief Constructor - /// - Pendulum(const RESTApiServerWrapper& api_server, - const uint_t cidx); + Pendulum(network::RESTRLEnvClient& api_server ); /// /// \brief copy ctor @@ -172,35 +166,32 @@ namespace envs::gymnasium /// /// \brief ~Pendulum. Destructor /// - ~Pendulum()=default; + ~Pendulum() override =default; /// /// \brief make. Build the environment /// virtual void make(const std::string& version, - const std::unordered_map& /*options*/) override final; + const std::unordered_map& /*options*/, + const std::unordered_map& reset_options) override final; /// /// \brief step. Step in the environment following the given action /// virtual time_step_type step(const action_type& action)override final; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - Pendulum make_copy(uint_t cidx)const; + /// /// \brief n_actions. Returns the number of actions /// uint_t n_actions()const noexcept{return action_space_type::size;} - protected: +protected: /// - /// \brief Handle the reset response from the environment server - /// + /// \brief Handle the reset response from the environment server + /// virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; }; diff --git a/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.cpp b/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.cpp index 1b4a1321..26004d78 100644 --- a/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.cpp +++ b/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.cpp @@ -29,19 +29,10 @@ namespace envs::gymnasium } - AcrobotV::AcrobotV(const RESTApiServerWrapper& api_server ) + AcrobotV::AcrobotV(network::RESTRLEnvClient& api_server ) : GymnasiumVecEnvBase, - detail_::AcrobotVEnv>(api_server, 0, AcrobotV::name) - { - this -> get_api_server().register_if_not(AcrobotV::name,AcrobotV::URI); - } - - AcrobotV::AcrobotV(const RESTApiServerWrapper& api_server , const uint_t cidx) - : - GymnasiumVecEnvBase, - detail_::AcrobotVEnv>(api_server, cidx, - AcrobotV::name) + detail_::AcrobotVEnv>(api_server, AcrobotV::name) { this -> get_api_server().register_if_not(AcrobotV::name,AcrobotV::URI); } @@ -56,20 +47,20 @@ namespace envs::gymnasium void AcrobotV::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } this->GymnasiumVecEnvBase, - detail_::AcrobotVEnv>::make(version, options); + detail_::AcrobotVEnv>::make(version, options, reset_options); nlohmann::json ops; ops["num_envs"] = this->get_n_envs(); auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), - version, ops); + version, ops); this->set_version_(version); this->make_created_(); @@ -85,29 +76,16 @@ namespace envs::gymnasium #endif if(this->get_reset_if_any_done() && this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); - } - - - AcrobotV - AcrobotV::make_copy(uint_t cidx)const{ - - AcrobotV copy(this->get_api_server(), cidx); - - std::unordered_map ops; - ops["num_envs"] = this -> get_n_envs(); - auto version = this -> version(); - copy.make(version, ops); - return copy; - } +} } } diff --git a/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.h b/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.h index 26e072ee..b7258e6d 100644 --- a/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.h +++ b/src/bitrl/envs/gymnasium/classic_control/vector/acrobot_vec_env.h @@ -12,7 +12,7 @@ #include "bitrl/envs/vector_time_step.h" #include "bitrl/envs/space_type.h" #include "bitrl/envs/gymnasium/gymnasium_vector_env_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -61,11 +61,11 @@ namespace envs::gymnasium /// /// \brief The CartPole class Interface for CartPole environment /// - class AcrobotV final: public GymnasiumVecEnvBase, +class AcrobotV final: public GymnasiumVecEnvBase, detail_::AcrobotVEnv> - { +{ - public: +public: /// /// \brief name @@ -112,13 +112,7 @@ namespace envs::gymnasium /// /// \brief Acrobot. Constructor /// - AcrobotV(const RESTApiServerWrapper& api_server ); - - /// - /// \brief CartPole. Constructor - /// - AcrobotV(const RESTApiServerWrapper& api_server , - const uint_t cidx); + AcrobotV(network::RESTRLEnvClient& api_server ); /// /// \brief copy ctor @@ -128,40 +122,33 @@ namespace envs::gymnasium /// /// \brief ~Acrobot. Destructor /// - ~AcrobotV()=default; + ~AcrobotV() override =default; /// /// \brief make. Build the environment /// virtual void make(const std::string& version, - const std::unordered_map&) override final; - + const std::unordered_map&, + const std::unordered_map& reset_options) override final; /// /// \brief step. Step in the environment following the given action /// virtual time_step_type step(const action_type& action) override final; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - AcrobotV make_copy(uint_t cidx)const; - /// /// \brief n_actions. Returns the number of actions /// uint_t n_actions()const noexcept{return action_space_type::size;} - protected: +protected: /// /// \brief Handle the reset response from the environment server /// virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; - }; - +}; } diff --git a/src/bitrl/envs/gymnasium/gymnasium_env_base.h b/src/bitrl/envs/gymnasium/gymnasium_env_base.h index 4c418c1b..68068dc9 100644 --- a/src/bitrl/envs/gymnasium/gymnasium_env_base.h +++ b/src/bitrl/envs/gymnasium/gymnasium_env_base.h @@ -9,9 +9,9 @@ #include "bitrl/bitrl_types.h" #include "bitrl/bitrl_config.h" #include "bitrl/envs/env_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" - +#include "bitrl/utils//std_map_utils.h" #include #include @@ -25,127 +25,120 @@ namespace bitrl{ namespace envs::gymnasium { - - /// - /// \brief class GymnasiumEnvBase. Base class for all Gymnasium wrappers - /// The class has two parameters: - /// - /// TimeStepType: the type of the time step to use - /// - /// SpaceType: The type of the space-action space - /// - template - class GymnasiumEnvBase: public EnvBase{ - public: - - - /// - /// \brief The base_type - /// - typedef EnvBase base_type; - - /// - /// \brief The time step type we return every time a step in the - /// environment is performed - /// - typedef typename base_type::time_step_type time_step_type; - - /// - /// \brief The type describing the state space for the environment - /// - typedef typename base_type::state_space_type state_space_type; - - /// - /// \brief The type of the action space for the environment - /// - typedef typename base_type::action_space_type action_space_type; - - /// - /// \brief The type of the action to be undertaken in the environment - /// - typedef typename base_type::action_type action_type; - - /// - /// \brief The type of the action to be undertaken in the environment - /// - typedef typename base_type::state_type state_type; - - /// - /// \brief Expose the various reset methods we use from base class - /// - using base_type::reset; - - /// - /// \brief ~GymnasiumEnvBase. Destructor. - /// - virtual ~GymnasiumEnvBase(); - - /// - /// \brief - /// - virtual bool is_alive()const; - - /// - /// \brief close the environment - /// - virtual void close() override; - - /// - /// \brief Reset the environment - /// - virtual time_step_type reset(uint_t seed, - const std::unordered_map& options)override; - - /// - /// \brief Returns read reference to the underlying API server wrapper - /// - const RESTApiServerWrapper& get_api_server()const{return api_server_;} - - /// - /// \brief Returns the full path on the server for this environment - /// - std::string get_url()const; - - protected: - - /// - /// \brief Constructor - /// - GymnasiumEnvBase(const RESTApiServerWrapper& api_server, - const uint_t cidx, - const std::string& name); - - /// - /// \brief Copy constructor - /// - GymnasiumEnvBase(const GymnasiumEnvBase&); - - /// - /// \brief Pointer to the api server that handles the requests - /// - RESTApiServerWrapper api_server_; - - - /// - /// \brief read reference to the api server instance - /// - RESTApiServerWrapper& get_api_server(){return api_server_;} - - /// - /// \brief build the time step from the server response - /// - virtual time_step_type create_time_step_from_response_(const nlohmann::json& response)const=0; +/** + * @class GymnasiumEnvBase + * @brief Base class for all Gymnasium environment wrappers. + * + * This template wraps a remote Gymnasium-compatible environment served through + * a REST API. It extends EnvBase and provides the common logic for environment + * reset, closing, and time-step handling via HTTP communication. + * + * @tparam TimeStepType The type representing one interaction step with the environment. + * @tparam SpaceType The type describing the observation and action spaces. + */ +template +class GymnasiumEnvBase: public EnvBase +{ +public: + + /** @brief Base environment type alias. */ + typedef EnvBase base_type; + + /** @brief Time step returned at each environment step. */ + typedef typename base_type::time_step_type time_step_type; + + /** @brief Type describing the observation/state space of the environment. */ + typedef typename base_type::state_space_type state_space_type; + + /** @brief Type describing the action space of the environment. */ + typedef typename base_type::action_space_type action_space_type; + + /** @brief Type representing a valid action to execute. */ + typedef typename base_type::action_type action_type; + + /** @brief Type representing a state/observation returned by the environment. */ + typedef typename base_type::state_type state_type; + + /** @brief Import the reset() overloads from the base class. */ + using base_type::reset; + + /** + * @brief Virtual destructor. + */ + virtual ~GymnasiumEnvBase(); + + /** + * @brief Check whether the environment is still alive/connected. + * @return True if the wrapper can still communicate with the server. + */ + virtual bool is_alive()const; + + /** + * @brief Close the environment on the server and release any resources. + */ + virtual void close() override; + + /** + * @brief Reset the environment to an initial state using the reset + * options specified during make. + * + * @return Initial time step after reset + */ + virtual time_step_type reset()override; + + /** + * @brief Retrieve the REST API wrapper instance used for communication. + * @return Read-only reference to the server wrapper. + */ + network::RESTRLEnvClient& get_api_server()const{return *api_server_;} + + /** + * @brief Get the full URL for this environment endpoint on the server. + * @return Environment URL string. + */ + std::string get_url()const; + +protected: + + /** + * @brief Constructor. + * + * @param api_server A reference to the REST server wrapper handling communication. + * @param cidx Index of this environment instance within a simulation. + * @param name Name of the environment. + */ + GymnasiumEnvBase(network::RESTRLEnvClient& api_server, const std::string& name); + + /** + * @brief Copy constructor. + */ + GymnasiumEnvBase(const GymnasiumEnvBase&); + + /** + * @brief Server wrapper handling communication with remote Gymnasium environment. + */ + network::RESTRLEnvClient* api_server_; + + /** + * @brief Build a TimeStepType instance from a server JSON response. + * + * Derived classes must parse the Gymnasium response and convert it into + * a fully constructed time step object. + * + * @param response JSON payload returned by the server. + * @return Constructed time_step_type instance. + */ + virtual time_step_type create_time_step_from_response_(const nlohmann::json& response)const=0; }; template GymnasiumEnvBase::GymnasiumEnvBase(const RESTApiServerWrapper& api_server, - const uint_t cidx, + SpaceType>::GymnasiumEnvBase(network::RESTRLEnvClient& api_server, const std::string& name) : - EnvBase(cidx, name), - api_server_(api_server) + EnvBase(name), + api_server_(&api_server) {} template @@ -171,49 +164,51 @@ namespace envs::gymnasium template bool GymnasiumEnvBase::is_alive()const{ - auto response = this -> api_server_.is_alive(this->env_name(), this -> cidx()); + auto response = this -> api_server_ -> is_alive(this->env_name(), this -> idx()); return response["result"]; } - template - void - GymnasiumEnvBase::close(){ - - if(!this->is_created()){ - return; - } - - auto response = this -> api_server_.close(this->env_name(), this -> cidx()); - this -> invalidate_is_created_flag_(); +template +void +GymnasiumEnvBase::close(){ + if(!this->is_created()){ + return; } - template - typename GymnasiumEnvBase::time_step_type - GymnasiumEnvBase::reset(uint_t seed, - const std::unordered_map& /*options*/){ + auto response = this -> api_server_ -> close(this->env_name(), + this -> idx()); + this -> invalidate_is_created_flag_(); +} - if(!this->is_created()){ +template +typename GymnasiumEnvBase::time_step_type +GymnasiumEnvBase::reset(){ + + if(!this->is_created()){ #ifdef BITRL_DEBUG assert(this->is_created() && "Environment has not been created"); #endif return time_step_type(); - } + } - auto response = this -> api_server_.reset(this->env_name(), - this -> cidx(), seed, + auto& reset_ops = this -> reset_options(); + auto seed = utils::resolve("seed", reset_ops); + + auto response = this -> api_server_ -> reset(this->env_name(), + this -> idx(), seed, nlohmann::json()); - this -> get_current_time_step_() = this->create_time_step_from_response_(response); - return this -> get_current_time_step_(); - } + this -> get_current_time_step_() = this->create_time_step_from_response_(response); + return this -> get_current_time_step_(); +} - template - std::string - GymnasiumEnvBase::get_url()const{ - return api_server_.get_env_url(this -> env_name()); - } +template +std::string +GymnasiumEnvBase::get_url()const{ + return api_server_ -> get_env_url(this -> env_name()); +} diff --git a/src/bitrl/envs/gymnasium/gymnasium_vector_env_base.h b/src/bitrl/envs/gymnasium/gymnasium_vector_env_base.h index 8c0c3bec..bb44ae7b 100644 --- a/src/bitrl/envs/gymnasium/gymnasium_vector_env_base.h +++ b/src/bitrl/envs/gymnasium/gymnasium_vector_env_base.h @@ -13,7 +13,7 @@ #include "bitrl/envs/gymnasium/gymnasium_env_base.h" #include "bitrl/bitrl_types.h" #include "bitrl/envs/env_types.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "../../network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #ifdef BITRL_DEBUG @@ -66,7 +66,8 @@ class GymnasiumVecEnvBase: public GymnasiumEnvBase& options)=0; + const std::unordered_map& options, + const std::unordered_map& reset_options)=0; /// @@ -89,8 +90,7 @@ class GymnasiumVecEnvBase: public GymnasiumEnvBase -GymnasiumVecEnvBase::GymnasiumVecEnvBase(const RESTApiServerWrapper& api_server, - const uint_t cidx, +GymnasiumVecEnvBase::GymnasiumVecEnvBase(network::RESTRLEnvClient& api_server, const std::string& name) : -GymnasiumEnvBase(api_server, cidx, name) +GymnasiumEnvBase(api_server, name) {} @@ -129,7 +128,8 @@ reset_if_any_done_(other.reset_if_any_done_) template void GymnasiumVecEnvBase::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ auto reset_if_any_done_itr = options.find("reset_if_any_done"); diff --git a/src/bitrl/envs/gymnasium/toy_text/black_jack_env.cpp b/src/bitrl/envs/gymnasium/toy_text/black_jack_env.cpp index 78d3a5be..bff81f30 100644 --- a/src/bitrl/envs/gymnasium/toy_text/black_jack_env.cpp +++ b/src/bitrl/envs/gymnasium/toy_text/black_jack_env.cpp @@ -37,19 +37,11 @@ namespace envs::gymnasium ); } - BlackJack::BlackJack(const RESTApiServerWrapper& api_server) + BlackJack::BlackJack(network::RESTRLEnvClient& api_server) : - ToyTextEnvBase, 48, 2>(api_server, 0, BlackJack::name) + ToyTextEnvBase, 48, 2>(api_server, BlackJack::name) { - this ->get_api_server().register_if_not(BlackJack::name, BlackJack::URI); - } - - BlackJack::BlackJack(const RESTApiServerWrapper& api_server, - const uint_t cidx) - : - ToyTextEnvBase, 48, 2>(api_server, cidx, BlackJack::name) - { - this ->get_api_server().register_if_not(BlackJack::name, BlackJack::URI); + this -> get_api_server().register_if_not(BlackJack::name, BlackJack::URI); } BlackJack::BlackJack(const BlackJack& other) @@ -61,7 +53,8 @@ namespace envs::gymnasium void BlackJack::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; @@ -81,12 +74,13 @@ namespace envs::gymnasium ops["natural"] = is_natural_; ops["sab"] = is_sab_; auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), - version, - ops); + version, + ops); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } @@ -98,32 +92,15 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); } - - - BlackJack - BlackJack::make_copy(uint_t cidx)const{ - - BlackJack copy(this -> get_api_server(), cidx); - - std::unordered_map ops; - ops["natural"] = this->is_natural(); - ops["sab"] = this->is_sab(); - - auto version = this -> version(); - copy.make(version, ops); - return copy; - - } - } } diff --git a/src/bitrl/envs/gymnasium/toy_text/black_jack_env.h b/src/bitrl/envs/gymnasium/toy_text/black_jack_env.h index 31c09cdc..850545a0 100644 --- a/src/bitrl/envs/gymnasium/toy_text/black_jack_env.h +++ b/src/bitrl/envs/gymnasium/toy_text/black_jack_env.h @@ -8,7 +8,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/envs/gymnasium/toy_text/toy_text_base.h" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -27,10 +27,10 @@ namespace envs::gymnasium /// \brief The BlackJack class. Wrapper to the Blackjack /// OpenAI-Gym environment. /// - class BlackJack final: public ToyTextEnvBase, 48, 2> - { +class BlackJack final: public ToyTextEnvBase, 48, 2> +{ - public: +public: /// /// \brief name @@ -73,73 +73,43 @@ namespace envs::gymnasium /// typedef typename base_type::state_type state_type; - /// - /// \brief BlackJack. Constructor. - /// - BlackJack(const RESTApiServerWrapper& api_server); + BlackJack(network::RESTRLEnvClient& api_server); - /// - /// \brief Constructor - /// - BlackJack(const RESTApiServerWrapper& api_server, const uint_t cidx); - - /// - /// - /// BlackJack(const BlackJack& other); - /// - /// \brief ~BlackJack. Destructor - /// - ~BlackJack()=default; + ~BlackJack() override =default; /// - /// \brief make. Builds the environment. Optionally we can choose if the - /// environment will be slippery - /// + /// \brief make. Builds the environment. Optionally we can choose if the + /// environment will be slippery + /// virtual void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// - /// \brief step - /// \param action - /// \return - /// - virtual time_step_type step(const action_type& action)override final; - + /// \brief step + /// \param action + /// \return /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - BlackJack make_copy(uint_t cidx)const; - - /// - /// - /// + virtual time_step_type step(const action_type& action)override final; bool is_natural()const noexcept{return is_natural_;} - - /// - /// - /// bool is_sab()const noexcept{return is_sab_;} - - protected: +protected: /// - /// \brief build the dynamics from response - /// + /// \brief build the dynamics from response + /// virtual dynamics_t build_dynamics_from_response_(const nlohmann::json&)const override final; /// - /// \brief Handle the reset response from the environment server - /// + /// \brief Handle the reset response from the environment server + /// virtual time_step_type create_time_step_from_response_(const nlohmann::json&) const override final; - - - private: +private: /// /// \brief Flag indicating if the environment has been diff --git a/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.cpp b/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.cpp index aa965cdb..5407ce3b 100644 --- a/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.cpp +++ b/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.cpp @@ -1,3 +1,4 @@ +#include "bitrl/bitrl_consts.h" #include "bitrl/envs/gymnasium/toy_text/cliff_world_env.h" #include "bitrl/bitrl_config.h" #include "bitrl/envs/time_step_type.h" @@ -30,30 +31,18 @@ namespace envs::gymnasium std::unordered_map info_; info_["prob"] = std::any(static_cast(info["prob"])); - - return CliffWorld::time_step_type(TimeStepEnumUtils::time_step_type_from_int(step_type), reward, observation, discount, std::move(info_)); } - - CliffWorld::CliffWorld(const RESTApiServerWrapper& api_server) + CliffWorld::CliffWorld(network::RESTRLEnvClient& api_server) : - ToyTextEnvBase, 37, 4>(api_server, 0, CliffWorld::name), + ToyTextEnvBase, 37, 4>(api_server, CliffWorld::name), max_episode_steps_(200) { - this ->get_api_server().register_if_not(CliffWorld::name,CliffWorld::URI); - } - - CliffWorld::CliffWorld(const RESTApiServerWrapper& api_server, - const uint_t cidx) - : - ToyTextEnvBase, 37, 4>(api_server, cidx, CliffWorld::name), - max_episode_steps_(200) - { - this ->get_api_server().register_if_not(CliffWorld::name,CliffWorld::URI); + this -> get_api_server().register_if_not(CliffWorld::name,CliffWorld::URI); } CliffWorld::CliffWorld(const CliffWorld& other) @@ -64,7 +53,8 @@ namespace envs::gymnasium void CliffWorld::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; @@ -78,13 +68,11 @@ namespace envs::gymnasium nlohmann::json ops; ops["max_episode_steps"] = max_episode_steps_; auto response = this -> get_api_server().make(this->env_name(), - this->cidx(), - version, - ops); - - this->set_version_(version); - this->make_created_(); - + version, ops); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } CliffWorld::time_step_type @@ -95,11 +83,11 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this->env_name(), - this->cidx(), + this->idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); @@ -107,15 +95,5 @@ namespace envs::gymnasium } - CliffWorld - CliffWorld::make_copy(uint_t cidx)const{ - - CliffWorld copy(this -> get_api_server(),cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - } - } } diff --git a/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.h b/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.h index d49c9e8b..e8f580e8 100644 --- a/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.h +++ b/src/bitrl/envs/gymnasium/toy_text/cliff_world_env.h @@ -38,7 +38,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/toy_text/toy_text_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -55,29 +55,29 @@ namespace envs::gymnasium /// /// \brief The CliffWorld class /// - class CliffWorld final: public ToyTextEnvBase, 37, 4> - { +class CliffWorld final: public ToyTextEnvBase, 37, 4> +{ - public: +public: /// - /// \brief name - /// + /// \brief name + /// static const std::string name; /// - /// \brief The URI for accessing the environment - /// + /// \brief The URI for accessing the environment + /// static const std::string URI; /// - /// \brief dynamics_t - /// + /// \brief dynamics_t + /// typedef std::vector> dynamics_type; /// - /// \brief The base type - /// + /// \brief The base type + /// typedef typename ToyTextEnvBase, 37, 4>::base_type base_type; /// @@ -109,13 +109,8 @@ namespace envs::gymnasium /// /// \brief CliffWorld /// - CliffWorld(const RESTApiServerWrapper& api_server); + CliffWorld(network::RESTRLEnvClient& api_server); - /// - /// \brief Constructor - /// - CliffWorld(const RESTApiServerWrapper& api_server, - const uint_t cidx); /// /// \brief copy constructor @@ -132,7 +127,8 @@ namespace envs::gymnasium /// environment will be slippery /// virtual void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// /// \brief step @@ -141,13 +137,6 @@ namespace envs::gymnasium /// virtual time_step_type step(const action_type& action) override final; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - CliffWorld make_copy(uint_t cidx)const; - - protected: /// @@ -155,10 +144,7 @@ namespace envs::gymnasium /// uint_t max_episode_steps_; - /// - /// \brief build the dynamics from response - /// - //virtual dynamics_t build_dynamics_from_response_(const nlohmann::json& response)const override final; + /// /// \brief Handle the reset response from the environment server diff --git a/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.cpp b/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.cpp index 17ed9f92..82be043e 100644 --- a/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.cpp +++ b/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.cpp @@ -22,28 +22,16 @@ namespace envs::gymnasium const std::string FrozenLake::URI = "/gymnasium/frozen-lake-env"; template - FrozenLake::FrozenLake(const RESTApiServerWrapper& api_server) + FrozenLake::FrozenLake(network::RESTRLEnvClient& api_server) : ToyTextEnvBase, frozenlake_state_size::size, - 3>(api_server, 0, FrozenLake::name), + 3>(api_server, FrozenLake::name), is_slippery_(true) { this -> get_api_server().register_if_not(FrozenLake::name,FrozenLake::URI); } - template - FrozenLake::FrozenLake(const RESTApiServerWrapper& api_server, - const uint_t cidx, bool slippery) - : - ToyTextEnvBase, - frozenlake_state_size::size, - 3>(api_server, cidx, FrozenLake::name), - is_slippery_(slippery) - { - this -> get_api_server().register_if_not(FrozenLake::name,FrozenLake::URI); - } - template FrozenLake::FrozenLake(const FrozenLake& other) : @@ -74,7 +62,8 @@ namespace envs::gymnasium template void FrozenLake::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; @@ -91,11 +80,12 @@ namespace envs::gymnasium ops["map_name"] = map_type(); ops["is_slippery"] = is_slippery_; auto response = this -> get_api_server().make(this -> env_name(), - this -> cidx(), version, ops); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } template @@ -107,11 +97,11 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this -> env_name(), - this -> cidx(), + this -> idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); @@ -119,22 +109,6 @@ namespace envs::gymnasium } - template - FrozenLake - FrozenLake::make_copy(uint_t cidx)const{ - - auto slippery = this -> is_slippery(); - FrozenLake copy(this -> get_api_server(), - cidx, slippery); - - std::unordered_map ops; - ops["is_slippery"] = this -> is_slippery(); - auto version = this -> version(); - copy.make(version, ops); - return copy; - - } - template class FrozenLake<4>; template class FrozenLake<8>; diff --git a/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.h b/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.h index 18672a1e..6129dbe5 100644 --- a/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.h +++ b/src/bitrl/envs/gymnasium/toy_text/frozen_lake_env.h @@ -58,7 +58,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/time_step.h" #include "bitrl/envs/gymnasium/toy_text/toy_text_base.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include @@ -100,32 +100,31 @@ namespace envs::gymnasium /// \brief The FrozenLake class. Wrapper to Gymnasium FrozenLake /// environment /// - template - class FrozenLake final: public ToyTextEnvBase, +template +class FrozenLake final: public ToyTextEnvBase, frozenlake_state_size::size, 3> - { - public: +{ +public: /// - /// \brief name - /// + /// \brief name + /// static const std::string name; /// - /// \brief The URI for accessing the environment - /// + /// \brief The URI for accessing the environment + /// static const std::string URI; /// - /// \brief dynamics_t - /// + /// \brief dynamics_t + /// typedef std::vector> dynamics_t; - /// - /// \brief The base type - /// + /// \brief The base type + /// typedef typename ToyTextEnvBase, frozenlake_state_size::size, 3>::base_type base_type; @@ -147,90 +146,71 @@ namespace envs::gymnasium typedef typename base_type::action_space_type action_space_type; /// - /// \brief The type of the action to be undertaken in the environment - /// + /// \brief The type of the action to be undertaken in the environment + /// typedef typename base_type::action_type action_type; /// - /// \brief The type of the action to be undertaken in the environment - /// + /// \brief The type of the action to be undertaken in the environment + /// typedef typename base_type::state_type state_type; /// - /// \brief Constructor. - /// - FrozenLake(const RESTApiServerWrapper& api_server); - + /// \brief Constructor /// - /// \brief Constructor - /// - FrozenLake(const RESTApiServerWrapper& api_server, - const uint_t cidx, bool slippery); + FrozenLake(network::RESTRLEnvClient& api_server); /// - /// \brief copy constructor - /// + /// \brief copy constructor + /// FrozenLake(const FrozenLake& other); /// - /// \brief ~FrozenLake. Destructor. - /// - ~FrozenLake()=default; + /// \brief ~FrozenLake. Destructor. + /// + ~FrozenLake() override =default; /// - /// \brief make. Builds the environment. Optionally we can choose if the - /// environment will be slippery - /// + /// \brief make. Builds the environment. Optionally we can choose if the + /// environment will be slippery + /// virtual void make(const std::string& version, - const std::unordered_map& options) override final; + const std::unordered_map& options, + const std::unordered_map& reset_options) override final; /// - /// \brief Step in the environment following the given action - /// + /// \brief Step in the environment following the given action + /// virtual time_step_type step(const action_type& action) override final; /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - FrozenLake make_copy(uint_t cidx)const; - - + /// \brief map_type + /// \return /// - /// \brief map_type - /// \return - /// std::string map_type()const noexcept{return side_size == 4 ? "4x4" : "8x8";} /// - /// \brief is_slipery - /// \return - /// + /// \brief is_slipery + /// \return + /// bool is_slippery()const noexcept{return is_slippery_;} - protected: - - +protected: /// - /// \brief build the dynamics from response - /// - //virtual dynamics_t build_dynamics_from_response_(const nlohmann::json& response)const override final; - + /// \brief Handle the reset response from the environment server /// - /// \brief Handle the reset response from the environment server - /// virtual time_step_type create_time_step_from_response_(const nlohmann::json& response) const override final; - private: +private: /// - /// \brief is_slipery_ - /// + /// \brief is_slipery_ + /// bool is_slippery_; - }; +}; } } diff --git a/src/bitrl/envs/gymnasium/toy_text/taxi_env.cpp b/src/bitrl/envs/gymnasium/toy_text/taxi_env.cpp index d6a07556..ab7e05c0 100644 --- a/src/bitrl/envs/gymnasium/toy_text/taxi_env.cpp +++ b/src/bitrl/envs/gymnasium/toy_text/taxi_env.cpp @@ -18,26 +18,14 @@ namespace envs::gymnasium const std::string Taxi::name = "Taxi"; const std::string Taxi::URI = "/gymnasium/taxi-env"; - Taxi::Taxi(const RESTApiServerWrapper& api_server) + Taxi::Taxi(network::RESTRLEnvClient& api_server) : - ToyTextEnvBase, 500, 6>(api_server, 0, Taxi::name) + ToyTextEnvBase, 500, 6>(api_server, Taxi::name) { - this ->get_api_server().register_if_not(Taxi::name,Taxi::URI); + this -> get_api_server().register_if_not(Taxi::name,Taxi::URI); } - Taxi::Taxi(const RESTApiServerWrapper& api_server, const uint_t cidx) - : - ToyTextEnvBase, 500, 6>(api_server, cidx,Taxi::name) - { - this ->get_api_server().register_if_not(Taxi::name,Taxi::URI); - } - Taxi::Taxi(const Taxi& other) - : - ToyTextEnvBase, 500, 6>(other) - {} - - Taxi::time_step_type Taxi::create_time_step_from_response_(const nlohmann::json& response)const{ @@ -53,19 +41,20 @@ namespace envs::gymnasium void Taxi::make(const std::string& version, - const std::unordered_map& /*options*/){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ if(this->is_created()){ return; } - this -> get_api_server().make(this->env_name(), - this->cidx(), - version, - nlohmann::json()); + auto response = this -> get_api_server().make(this->env_name(),version, + nlohmann::json()); - this->set_version_(version); - this->make_created_(); + auto idx = response["idx"]; + this -> set_idx_(idx); + this -> base_type::make(version, options, reset_options); + this -> make_created_(); } @@ -77,27 +66,18 @@ namespace envs::gymnasium #endif if(this->get_current_time_step_().last()){ - return this->reset(42, std::unordered_map()); + return this->reset(); } auto response = this -> get_api_server().step(this->env_name(), - this->cidx(), + this->idx(), action); this->get_current_time_step_() = this->create_time_step_from_response_(response); return this->get_current_time_step_(); } - Taxi - Taxi::make_copy(uint_t cidx)const{ - Taxi copy(this -> get_api_server(), cidx); - std::unordered_map ops; - auto version = this -> version(); - copy.make(version, ops); - return copy; - - } } } diff --git a/src/bitrl/envs/gymnasium/toy_text/taxi_env.h b/src/bitrl/envs/gymnasium/toy_text/taxi_env.h index 2e8d0264..033283ea 100644 --- a/src/bitrl/envs/gymnasium/toy_text/taxi_env.h +++ b/src/bitrl/envs/gymnasium/toy_text/taxi_env.h @@ -5,7 +5,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/envs/gymnasium/toy_text/toy_text_base.h" #include "bitrl/envs/time_step.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include @@ -19,35 +19,35 @@ namespace envs::gymnasium /// /// \brief The Taxi class /// - class Taxi final: public ToyTextEnvBase, 500, 6> - { - public: +class Taxi final: public ToyTextEnvBase, 500, 6> +{ +public: - /// + /// /// \brief name /// - static const std::string name; + static const std::string name; - /// + /// /// \brief The URI for accessing the environment /// - static const std::string URI; + static const std::string URI; - /// + /// /// \brief The base type /// - typedef typename ToyTextEnvBase, 500, 6>::base_type base_type; + typedef typename ToyTextEnvBase, 500, 6>::base_type base_type; - /// + /// /// \brief The time step type we return every time a step in the /// environment is performed /// - typedef typename base_type::time_step_type time_step_type; + typedef typename base_type::time_step_type time_step_type; - /// + /// /// \brief The type describing the state space for the environment /// - typedef typename base_type::state_space_type state_space_type; + typedef typename base_type::state_space_type state_space_type; /// /// \brief The type of the action space for the environment @@ -67,13 +67,8 @@ namespace envs::gymnasium /// /// \brief Taxi /// - Taxi(const RESTApiServerWrapper& api_server); + Taxi(network::RESTRLEnvClient& api_server); - /// - /// \brief Constructor - /// - Taxi(const RESTApiServerWrapper& api_server, - const uint_t cidx); /// /// \brief copy constructor @@ -83,35 +78,23 @@ namespace envs::gymnasium /// /// \brief ~FrozenLake. Destructor. /// - ~Taxi()=default; + ~Taxi() override =default; /// /// \brief make. Builds the environment. Optionally we can choose if the /// environment will be slippery /// virtual void make(const std::string& version, - const std::unordered_map& /*options*/) override final; - + const std::unordered_map& /*options*/, + const std::unordered_map& /*reset_options*/) override final; /// /// \brief step /// virtual time_step_type step(const action_type& action) override final; - /// - /// \brief Create a new copy of the environment with the given - /// copy index - /// - Taxi make_copy(uint_t cidx)const; - - protected: - - - /// - /// \brief build the dynamics from response - /// - //virtual dynamics_t build_dynamics_from_response_(const nlohmann::json& response)const override final; +protected: /// /// \brief Handle the reset response from the environment server @@ -120,7 +103,7 @@ namespace envs::gymnasium - }; +}; } } diff --git a/src/bitrl/envs/gymnasium/toy_text/toy_text_base.h b/src/bitrl/envs/gymnasium/toy_text/toy_text_base.h index cfa8d081..b9bcea92 100644 --- a/src/bitrl/envs/gymnasium/toy_text/toy_text_base.h +++ b/src/bitrl/envs/gymnasium/toy_text/toy_text_base.h @@ -5,7 +5,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/extern/nlohmann/json/json.hpp" #include "bitrl/bitrl_config.h" -#include "bitrl/envs/api_server/apiserver.h" +#include "bitrl/network/rest_rl_env_client.h" #include "bitrl/envs/env_types.h" #include @@ -22,111 +22,105 @@ namespace bitrl{ namespace envs::gymnasium { - /// +/// /// \brief ToyTextEnvBase class. Base class /// for toy environments from Gymnasium. These environments /// have a discrete action and state spaces - template - class ToyTextEnvBase: public GymnasiumEnvBase >{ - public: - +template +class ToyTextEnvBase: public GymnasiumEnvBase >{ +public: /// /// \brief The base_type /// - typedef typename GymnasiumEnvBase >::base_type base_type; - /// + /// /// \brief The time step type we return every time a step in the /// environment is performed /// - typedef typename base_type::time_step_type time_step_type; + typedef typename base_type::time_step_type time_step_type; - /// + /// /// \brief The type describing the state space for the environment /// - typedef typename base_type::state_space_type state_space_type; + typedef typename base_type::state_space_type state_space_type; - /// + /// /// \brief The type of the action space for the environment /// - typedef typename base_type::action_space_type action_space_type; + typedef typename base_type::action_space_type action_space_type; - /// + /// /// \brief The type of the action to be undertaken in the environment /// - typedef typename base_type::action_type action_type; + typedef typename base_type::action_type action_type; /// /// \brief The type of the state /// - typedef typename base_type::state_type state_type; + typedef typename base_type::state_type state_type; /// /// \brief dynamics_t /// - typedef std::vector> dynamics_t; + typedef std::vector> dynamics_t; - /// + /// /// \brief ~FrozenLake. Destructor. /// - virtual ~ToyTextEnvBase()=default; + virtual ~ToyTextEnvBase()=default; - /// + /// /// \brief P /// \param sidx /// \param aidx /// - dynamics_t p(uint_t sidx, uint_t aidx)const; + dynamics_t p(uint_t sidx, uint_t aidx)const; - /// + /// /// \brief n_actions. Returns the number of actions /// - uint_t n_actions()const noexcept{return action_space_type::size;} + uint_t n_actions()const noexcept{return action_space_type::size;} - /// + /// /// \brief Number of states. /// - uint_t n_states()const noexcept{return state_space_type::size;} + uint_t n_states()const noexcept{return state_space_type::size;} - protected: +protected: - /// + /// /// \brief Constructor /// - ToyTextEnvBase(const RESTApiServerWrapper& api_server, - const uint_t cidx, - const std::string& name); + ToyTextEnvBase(network::RESTRLEnvClient& api_server, const std::string& name); - /// + /// /// \brief Copy constructor /// - ToyTextEnvBase(const ToyTextEnvBase& other); + ToyTextEnvBase(const ToyTextEnvBase& other); - /// + /// /// \brief build the dynamics from response /// - virtual dynamics_t build_dynamics_from_response_(const nlohmann::json& response)const; + virtual dynamics_t build_dynamics_from_response_(const nlohmann::json& response)const; - }; +}; - template - ToyTextEnvBase::ToyTextEnvBase(const RESTApiServerWrapper& api_server, - const uint_t cidx, - const std::string& name) +template +ToyTextEnvBase::ToyTextEnvBase(network::RESTRLEnvClient& api_server,const std::string& name) : GymnasiumEnvBase>(api_server, - cidx, name) {} - template - ToyTextEnvBase +ToyTextEnvBase::ToyTextEnvBase(const ToyTextEnvBase& other) @@ -145,7 +139,7 @@ namespace envs::gymnasium #endif auto response = this -> get_api_server().dynamics(this->env_name(), - this->cidx(), + this->idx(), sidx, aidx); return build_dynamics_from_response_(response); } diff --git a/src/bitrl/envs/multi_armed_bandits/multi_armed_banditis.cpp b/src/bitrl/envs/multi_armed_bandits/multi_armed_banditis.cpp index e773f013..11d274c2 100644 --- a/src/bitrl/envs/multi_armed_bandits/multi_armed_banditis.cpp +++ b/src/bitrl/envs/multi_armed_bandits/multi_armed_banditis.cpp @@ -21,7 +21,8 @@ bandits_() void MultiArmedBandits::make(const std::string& version, - const std::unordered_map& options){ + const std::unordered_map& options, + const std::unordered_map& reset_options){ auto p_itr = options.find("p"); @@ -60,9 +61,8 @@ MultiArmedBandits::make(const std::string& version, } MultiArmedBandits::time_step_type -MultiArmedBandits::reset(uint_t seed, - const std::unordered_map& /*options*/){ - seed_ = seed; +MultiArmedBandits::reset(){ + seed_ = 42; static auto res = [](auto& bernoulli){ bernoulli.reset(); diff --git a/src/bitrl/envs/multi_armed_bandits/multi_armed_bandits.h b/src/bitrl/envs/multi_armed_bandits/multi_armed_bandits.h index 53876223..2ec83c40 100644 --- a/src/bitrl/envs/multi_armed_bandits/multi_armed_bandits.h +++ b/src/bitrl/envs/multi_armed_bandits/multi_armed_bandits.h @@ -91,8 +91,6 @@ class MultiArmedBandits final: public EnvBase, MultiArmedBanditsS /// typedef typename base_type::state_type state_type; - using base_type::reset; - /// /// \brief MultiArmedBandits Constructor /// @@ -105,7 +103,8 @@ class MultiArmedBandits final: public EnvBase, MultiArmedBanditsS /// Concrete classes may choose to hold a copy /// virtual void make(const std::string& version, - const std::unordered_map& options)override final; + const std::unordered_map& options, + const std::unordered_map& reset_options)override final; /// /// \brief close the environment @@ -117,8 +116,7 @@ class MultiArmedBandits final: public EnvBase, MultiArmedBanditsS /// \param seed. The seed to use for resetting the environment /// \param options. Options to use for resetting the environment. /// - virtual time_step_type reset(uint_t seed, - const std::unordered_map& options)override final; + virtual time_step_type reset()override final; /// diff --git a/src/bitrl/envs/with_rest_api_mixin.h b/src/bitrl/envs/with_rest_api_mixin.h deleted file mode 100644 index a2dc2888..00000000 --- a/src/bitrl/envs/with_rest_api_mixin.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef WITH_REST_API_MIXIN_H -#define WITH_REST_API_MIXIN_H - -#include "rlenvs/extern/HTTPRequest.hpp" -#include - -namespace rlenvscpp{ -namespace envs{ - - -/// -/// \brief class synchronized_env_mixin -/// -template -class with_rest_api_mixin -{ -public: - - typedef TimeStepType time_step_type; - - /// - /// \brief Constructor - /// - with_rest_api_mixin(const std::string& api_url, const std::string& resource_path); - - /// - /// \brief Copy ctor - /// - with_rest_api_mixin(const with_rest_api_mixin& other); - - /// - /// \brief destructor - /// - virtual ~with_rest_api_mixin()=default; - - /// - /// \brief build the time step from the server response - /// - virtual time_step_type create_time_step_from_response_(const http::Response& response)const=0; - - /// - /// \brief Query the environment server is the environment has been created - /// - virtual bool is_alive()const=0; - - /// - /// \brief Returns the url the environment is using - /// - std::string get_url()const noexcept{return api_url_ + resource_path_;} - - /// - /// \brief Returns the url the environment is using - /// - std::string get_api_url()const noexcept{return api_url_;} - - /// - /// \brief The resource path - /// - std::string get_resource_path()const noexcept {return resource_path_;} - - -private: - - std::string api_url_; - std::string resource_path_; - - -}; - -template -with_rest_api_mixin::with_rest_api_mixin(const std::string& api_url, const std::string& resource_path) -: -api_url_(api_url), -resource_path_(resource_path) -{} - -template -with_rest_api_mixin::with_rest_api_mixin(const with_rest_api_mixin& other) -: -api_url_(other.api_url_), -resource_path_(other.resource_path_) -{} - -} -} - -#endif \ No newline at end of file diff --git a/src/bitrl/envs/api_server/apiserver.cpp b/src/bitrl/network/rest_rl_env_client.cpp similarity index 73% rename from src/bitrl/envs/api_server/apiserver.cpp rename to src/bitrl/network/rest_rl_env_client.cpp index 1aad5486..2938f58f 100644 --- a/src/bitrl/envs/api_server/apiserver.cpp +++ b/src/bitrl/network/rest_rl_env_client.cpp @@ -1,4 +1,4 @@ -#include "bitrl/envs/api_server/apiserver.h" +#include "rest_rl_env_client.h" #include "bitrl/bitrl_consts.h" #include "bitrl/extern/HTTPRequest.hpp" #include "bitrl/extern/nlohmann/json/json.hpp" @@ -7,9 +7,9 @@ #include namespace bitrl{ -namespace envs{ +namespace network{ -RESTApiServerWrapper::RESTApiServerWrapper(const std::string& url, const bool initialize) +RESTRLEnvClient::RESTRLEnvClient(const std::string& url, const bool initialize) : url_(url), is_init_(false), @@ -21,7 +21,7 @@ envs_() } void -RESTApiServerWrapper::init_(){ +RESTRLEnvClient::init_(){ envs_["FrozenLake"] = "/gymnasium/frozen-lake-env"; envs_["Taxi"] = "/gymnasium/taxi-env"; @@ -37,7 +37,7 @@ RESTApiServerWrapper::init_(){ } void -RESTApiServerWrapper::register_new(const std::string& name, const std::string& uri){ +RESTRLEnvClient::register_new(const std::string& name, const std::string& uri){ auto env_itr = envs_.find(name); @@ -51,7 +51,7 @@ RESTApiServerWrapper::register_new(const std::string& name, const std::string& u } void -RESTApiServerWrapper::register_if_not(const std::string& name, +RESTRLEnvClient::register_if_not(const std::string& name, const std::string& uri){ try{ @@ -64,7 +64,7 @@ RESTApiServerWrapper::register_if_not(const std::string& name, } std::string -RESTApiServerWrapper::get_uri(const std::string& name)const noexcept{ +RESTRLEnvClient::get_uri(const std::string& name)const noexcept{ auto env_itr = envs_.find(name); @@ -76,7 +76,7 @@ RESTApiServerWrapper::get_uri(const std::string& name)const noexcept{ } std::string -RESTApiServerWrapper::get_env_url(const std::string& name)const noexcept{ +RESTRLEnvClient::get_env_url(const std::string& name)const noexcept{ auto uri_ = get_uri(name); @@ -88,8 +88,7 @@ RESTApiServerWrapper::get_env_url(const std::string& name)const noexcept{ } nlohmann::json -RESTApiServerWrapper::is_alive(const std::string& env_name, - const uint_t cidx)const{ +RESTRLEnvClient::is_alive(const std::string& env_name, const std::string& idx)const{ // find the source auto url_ = get_env_url(env_name); @@ -97,11 +96,8 @@ RESTApiServerWrapper::is_alive(const std::string& env_name, if(url_ == bitrl::consts::INVALID_STR){ throw std::logic_error("Environment: " + env_name + " is not registered"); } - - - auto copy_idx_str = std::to_string(cidx); - - http::Request request{url_ + "/is-alive?cidx="+copy_idx_str}; + + http::Request request{url_ + "/" + idx + "/is-alive"}; const auto response = request.send("GET"); auto str_response = std::string(response.body.begin(), response.body.end()); @@ -111,8 +107,7 @@ RESTApiServerWrapper::is_alive(const std::string& env_name, } nlohmann::json -RESTApiServerWrapper::close(const std::string& env_name, - const uint_t cidx)const{ +RESTRLEnvClient::close(const std::string& env_name, const std::string& idx)const{ // find the source auto url_ = get_env_url(env_name); @@ -121,7 +116,7 @@ RESTApiServerWrapper::close(const std::string& env_name, throw std::logic_error("Environment: " + env_name + " is not registered"); } - http::Request request{url_ + "/close?cidx="+std::to_string(cidx)}; + http::Request request{url_ + "/" + idx + "/close"}; const auto response = request.send("POST"); @@ -135,10 +130,8 @@ RESTApiServerWrapper::close(const std::string& env_name, } nlohmann::json -RESTApiServerWrapper::reset(const std::string& env_name, - const uint_t cidx, - const uint_t seed, - const nlohmann::json& options)const{ +RESTRLEnvClient::reset(const std::string& env_name, const std::string& idx, + const uint_t seed, const nlohmann::json& options)const{ // find the source @@ -148,12 +141,11 @@ RESTApiServerWrapper::reset(const std::string& env_name, throw std::logic_error("Environment: " + env_name + " is not registered"); } - const auto request_url = url_ + "/reset"; + const auto request_url = url_ + "/" + idx + "/reset"; http::Request request{request_url}; nlohmann::json request_body; request_body["seed"] = seed; - request_body["cidx"] = cidx; request_body["options"] = options; const auto response = request.send("POST", request_body.dump()); @@ -170,10 +162,9 @@ RESTApiServerWrapper::reset(const std::string& env_name, nlohmann::json -RESTApiServerWrapper::make(const std::string& env_name, - const uint_t cidx, - const std::string& version, - const nlohmann::json& options)const{ +RESTRLEnvClient::make(const std::string& env_name, + const std::string& version, + const nlohmann::json& options)const{ // find the source auto url_ = get_env_url(env_name); @@ -187,7 +178,6 @@ RESTApiServerWrapper::make(const std::string& env_name, nlohmann::json request_body; request_body["version"] = version; - request_body["cidx"] = cidx; request_body["options"] = options; const auto response = request.send("POST", request_body.dump()); @@ -203,10 +193,8 @@ RESTApiServerWrapper::make(const std::string& env_name, } nlohmann::json -RESTApiServerWrapper::dynamics(const std::string& env_name, - const uint_t cidx, - const uint_t sidx, - const uint_t aidx)const{ +RESTRLEnvClient::dynamics(const std::string& env_name, const std::string& idx, + const uint_t sidx, const uint_t aidx)const{ // find the source auto url_ = get_env_url(env_name); @@ -215,8 +203,7 @@ RESTApiServerWrapper::dynamics(const std::string& env_name, throw std::logic_error("Environment: " + env_name + " is not registered"); } - const auto request_url = url_ + "/dynamics?cidx="+std::to_string(cidx) - +"&stateId="+std::to_string(sidx) + const auto request_url = url_ + "/" + idx + "/dynamics?stateId="+std::to_string(sidx) +"&actionId="+std::to_string(aidx); http::Request request{request_url}; const auto response = request.send("GET"); @@ -227,7 +214,7 @@ RESTApiServerWrapper::dynamics(const std::string& env_name, } bool -RESTApiServerWrapper::has_gymnasium()const{ +RESTRLEnvClient::has_gymnasium()const{ const auto request_url = url_ + "/api-info/gymnasium"; http::Request request{request_url}; @@ -238,7 +225,7 @@ RESTApiServerWrapper::has_gymnasium()const{ } std::vector -RESTApiServerWrapper::gymnasium_envs()const{ +RESTRLEnvClient::gymnasium_envs()const{ const auto request_url = url_ + "/api-info/gymnasium/envs"; http::Request request{request_url}; diff --git a/src/bitrl/envs/api_server/apiserver.h b/src/bitrl/network/rest_rl_env_client.h similarity index 84% rename from src/bitrl/envs/api_server/apiserver.h rename to src/bitrl/network/rest_rl_env_client.h index d1c14fd5..5dd3343a 100644 --- a/src/bitrl/envs/api_server/apiserver.h +++ b/src/bitrl/network/rest_rl_env_client.h @@ -1,7 +1,7 @@ -#ifndef APISERVER_H -#define APISERVER_H +#ifndef REST_RL_ENV_CLIENT_H +#define REST_RL_ENV_CLIENT_H #include "bitrl/bitrl_types.h" #include "bitrl/bitrl_consts.h" @@ -18,22 +18,22 @@ /// /// namespace bitrl{ -namespace envs{ +namespace network{ /// /// \brief Utility class to facilitate /// HTTP requests between the environments REST API /// and C++ drivers /// -class RESTApiServerWrapper +class RESTRLEnvClient { public: /// /// \brief Constructor /// - explicit RESTApiServerWrapper(const std::string& url="http://0.0.0.0:8001/api", - const bool initialize=true); + explicit RESTRLEnvClient(const std::string& url="http://0.0.0.0:8001/api", + const bool initialize=true); /// /// \brief Returns true if the server is initialised @@ -76,7 +76,7 @@ class RESTApiServerWrapper /// Throws std::logic_error is the environment is not registered /// nlohmann::json is_alive(const std::string& env_name, - const uint_t cidx)const; + const std::string& idx)const; /// /// \brief Close the environment with the given name. @@ -84,7 +84,7 @@ class RESTApiServerWrapper /// Throws std::runtime_error if the server response is not 201 /// nlohmann::json close(const std::string& env_name, - const uint_t cidx)const; + const std::string& idx)const; /// /// \brief Step in the environment with the given name @@ -95,7 +95,7 @@ class RESTApiServerWrapper /// template nlohmann::json step(const std::string& env_name, - const uint_t cidx, + const std::string& idx, const ActionType& action)const; /// @@ -105,7 +105,7 @@ class RESTApiServerWrapper /// Throws std::runtime_error if the server response is not 202 /// nlohmann::json reset(const std::string& env_name, - const uint_t cidx, + const std::string& idx, const uint_t seed, const nlohmann::json& options)const; @@ -116,7 +116,6 @@ class RESTApiServerWrapper /// Throws std::runtime_error if the server response is not 202 /// nlohmann::json make(const std::string& env_name, - const uint_t cidx, const std::string& version, const nlohmann::json& options)const; @@ -126,17 +125,9 @@ class RESTApiServerWrapper /// does not expose such an endpoint it returns 404 /// nlohmann::json dynamics(const std::string& env_name, - const uint_t cidx, + const std::string& idx, const uint_t sidx, const uint_t aidx)const; - - /// - /// \brief Make the cidx copy of the environment - /// - nlohmann::json copy(const std::string& env_name, - const uint_t cidx, - const std::string& version, - const nlohmann::json& options)const{return make(env_name, cidx, version, options);} /// /// @@ -175,7 +166,7 @@ class RESTApiServerWrapper template nlohmann::json -RESTApiServerWrapper::step(const std::string& env_name, const uint_t cidx, +RESTRLEnvClient::step(const std::string& env_name, const std::string& idx, const ActionType& action)const{ @@ -186,11 +177,10 @@ RESTApiServerWrapper::step(const std::string& env_name, const uint_t cidx, throw std::logic_error("Environment: " + env_name + " is not registered"); } - const auto request_url = url_ + "/step"; + const auto request_url = url_ + "/" + idx + "/step"; http::Request request{request_url}; nlohmann::json body; - body["cidx"] = cidx; body["action"] = action; const auto response = request.send("POST", body.dump()); diff --git a/src/bitrl/sensors/ekf_sensor_fusion.h b/src/bitrl/sensors/ekf_sensor_fusion.h index 32d0117e..3c563814 100644 --- a/src/bitrl/sensors/ekf_sensor_fusion.h +++ b/src/bitrl/sensors/ekf_sensor_fusion.h @@ -7,6 +7,7 @@ #include "bitrl/bitrl_types.h" #include "bitrl/sensors/sensor_type_enum.h" +#include "bitrl/network/mqtt_subscriber.h" #include "Eigen/Dense" #include @@ -19,12 +20,6 @@ namespace bitrl { - namespace network - { - // forward declaration - class MqttSubscriber; - } - namespace sensors { template diff --git a/src/bitrl/utils/std_map_utils.h b/src/bitrl/utils/std_map_utils.h index 3c476f3e..afdc8067 100644 --- a/src/bitrl/utils/std_map_utils.h +++ b/src/bitrl/utils/std_map_utils.h @@ -9,17 +9,17 @@ #include #include +#include #include namespace bitrl{ namespace utils{ - -/// -/// \fn resolve -/// \brief Given the name of the argument return std::any_cast(itr->second) -/// where itr is itr = input.find(name) -/// + +/** + * Given the name of the argument return std::any_cast(itr->second) + * where itr is itr = input.find(name) + */ template OutT resolve(const std::string& name, @@ -35,8 +35,32 @@ resolve(const std::string& name, } - } + /** + * Given the name of the argument return std::any_cast(itr->second) + * where itr is itr = input.find(name) + */ + template + OutT + resolve(const std::string& name, + const std::unordered_map& input) +{ + auto itr = input.find(name); + + if (itr == input.end()) { + throw std::logic_error("Property: " + name + " not in input"); + } + + try { + return std::any_cast(itr->second); + } + catch (const std::bad_any_cast&) { + throw std::logic_error( + "Property: " + name + " has unexpected type" + ); + } } + } +} #endif \ No newline at end of file diff --git a/src/bitrl/utils/utils.h b/src/bitrl/utils/utils.h new file mode 100644 index 00000000..6e3bdb13 --- /dev/null +++ b/src/bitrl/utils/utils.h @@ -0,0 +1,31 @@ +// +// Created by alex on 12/13/25. +// + +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include +#include + +namespace bitrl +{ + namespace utils + { + /** + * Generate UUID4 strings using Boost::uuids. + * For more info on UUIDs see: https://www.ibm.com/docs/en/cobol-zos/6.3.0?topic=functions-uuid4 + * @return + */ + inline + std::string uuid4() + { + return boost::uuids::to_string(boost::uuids::random_generator()()); + } + + } +} + +#endif //UTILS_H