diff --git a/.gitignore b/.gitignore
index 4fffd9c..be4bd47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ logs/
 
 # Dependency directories
 venv/
+.venv/
 env/
 env.bak/
 env.tmp/
@@ -46,4 +47,6 @@ env.production.local/
 Thumbs.db
 
 # QASM files
-*.qasm
\ No newline at end of file
+*.qasm
+
+results/*
\ No newline at end of file
diff --git a/env_creator.py b/env_creator.py
index 61af8b9..2ab1e51 100644
--- a/env_creator.py
+++ b/env_creator.py
@@ -1,6 +1,6 @@
 from gymenv_qsimpy import QSimPyEnv
-from env_wrapper import ScaleQSimPyEnv
-from gymnasium.experimental.wrappers import RescaleObservationV0, DtypeObservationV0
+from env_wrapper import ScaleQSimPyEnv , SerializableEnvWrapper
+from gymnasium.wrappers import RescaleObservation, DtypeObservation
 import numpy as np
 
 
@@ -10,20 +10,25 @@ def qsimpy_env_creator(env_config):
     config = config if config is not None else {}
     if dataset is None:
         raise ValueError("Dataset is not specified")
+    
     env = QSimPyEnv(dataset=dataset, config=config)
+    env = SerializableEnvWrapper(env)
+    
     obs_filter = env_config.pop("obs_filter", None)
     reward_filter = env_config.pop("reward_filter", None)
 
     if obs_filter is not None:
         if obs_filter == "rescale_-1_1":
-            env = RescaleObservationV0(
+            env = RescaleObservation(
                 env=env,
                 min_obs=np.ones((env.obs_dim,), dtype=np.float32) * -1,
                 max_obs=np.ones((env.obs_dim,), dtype=np.float32) * 1,
             )
-            env = DtypeObservationV0(env, dtype=np.float32)
+            env = DtypeObservation(env, dtype=np.float32)
 
     if reward_filter is not None:
         if reward_filter == "scale_2x":
             env = ScaleQSimPyEnv(env, scale=env_config.pop("reward_scale", 2))
+            
+    # for i in range(10) : print(type(env))
     return env
\ No newline at end of file
diff --git a/env_wrapper.py b/env_wrapper.py
index bb5bd2d..07663e4 100644
--- a/env_wrapper.py
+++ b/env_wrapper.py
@@ -1,8 +1,9 @@
 import gymnasium as gym
 from gymnasium.core import Env
-from gymnasium.wrappers.normalize import NormalizeObservation, NormalizeReward
+# Make sure these are here if not already
+from numpy.random import default_rng
+import simpy
 import numpy as np
-from gymnasium.spaces import Box
 
 
 class ScaleQSimPyEnv(gym.RewardWrapper):
@@ -13,12 +14,43 @@ def __init__(self, env: Env, scale: float):
     def reward(self, reward):
         reward *= self.scaling_factor
         return reward
+class SerializableEnvWrapper(gym.Wrapper):
+    def __getattr__(self, name):
+        return getattr(self.env, name)
 
+    def __getstate__(self):
+        # Start with wrapper __dict__
+        state = self.__dict__.copy()
 
-class GymNormalizeObservation(NormalizeObservation):
-    def __init__(self, env: Env, *args, **kwargs):
-        super().__init__(env, *args, **kwargs)
-        self.observation_space = Box(
-            low=np.ones((self.env.obs_dim,)) * -np.inf,
-            high=np.ones((self.env.obs_dim,)) * np.inf,
-        )
+        # Replace self.env with its safe state
+        if hasattr(self.env, "__getstate__"):
+            state["env_state"] = self.env.__getstate__()
+        else:
+            state["env_state"] = self.env.__dict__.copy()
+
+        # Don't pickle the actual env object directly
+        if "env" in state:
+            del state["env"]
+
+        # Debug: check for generators in wrapper state
+        for k, v in list(state.items()):
+            if hasattr(v, "__iter__") and not isinstance(v, (list, tuple, dict, str, bytes, np.ndarray)):
+                print(f"[WRAPPER-PICKLE] Removing generator-like object at key '{k}' ({type(v)})")
+                del state[k]
+
+        # Preserve dataset path for reconstruction
+        state["_dataset_path"] = getattr(self.env, "dataset_path", None)
+        return state
+
+    def __setstate__(self, state):
+        from gymenv_qsimpy import QSimPyEnv
+        dataset_path = state.pop("_dataset_path", None)
+        if not dataset_path:
+            raise ValueError("Missing dataset path for deserialization")
+
+        new_env = QSimPyEnv(dataset=dataset_path)
+        if "env_state" in state:
+            new_env.__setstate__(state.pop("env_state"))
+
+        super().__init__(new_env)
+        self.__dict__.update(state)
diff --git a/evaluator_greedy.py b/evaluator_greedy.py
index 2c64ede..f5397dc 100644
--- a/evaluator_greedy.py
+++ b/evaluator_greedy.py
@@ -16,7 +16,7 @@
 
 
 def enhanced_greedy_policy(env, skipped_list):
-    current_obs = env.current_obs
+    current_obs = env.unwrapped.current_obs
     qnode_start_index = 4  # Adjust based on the actual qtask observation length
     qnode_obs_length = 3  # Number of values per qnode in the observation
 
diff --git a/gymenv_qsimpy.py b/gymenv_qsimpy.py
index af5720e..0f75d47 100644
--- a/gymenv_qsimpy.py
+++ b/gymenv_qsimpy.py
@@ -54,6 +54,10 @@ def __init__(
         """
         
         super().__init__()
+        if dataset is None:
+            raise ValueError("Dataset is not specified")
+        
+        self.dataset_path = dataset
 
         # OBSERVATION SPACE
         # Each observation is a dict of qtask_attributes and qnode_attributes
@@ -63,6 +67,7 @@ def __init__(
         self.n_qnodes = 5  # number of qnodes
         self.qtasks = []
         self.qnodes = []
+        self.serviced_qtasks = []
         self.mode = mode
         self.obs_dim = 4 + self.n_qnodes * 3
         self.observation_space = Box(
@@ -79,37 +84,36 @@ def __init__(
 
         # Assuming the observation consists of [arrival_time, qubit_number, circuit_layers] for tasks
         # and [qubit_number, clops, next_available_time] for each node
-        task_obs_low = np.array([0, 0, 0, 0], dtype=np.float64)
+        task_obs_low = np.array([0, 0, 0, 0], dtype=np.float32)
         task_obs_high = np.array(
-            [max_time, max_qubits, max_layers, max_rescheduling_count], dtype=np.float64
+            [max_time, max_qubits, max_layers, max_rescheduling_count], dtype=np.float32
         )
-        node_obs_low = np.array([0, 0, -1] * self.n_qnodes, dtype=np.float64)
+        node_obs_low = np.array([0, 0, -1] * self.n_qnodes, dtype=np.float32)
         node_obs_high = np.array(
-            [max_qubits, max_clops, max_time] * self.n_qnodes, dtype=np.float64
+            [max_qubits, max_clops, max_time] * self.n_qnodes, dtype=np.float32
         )
 
         # Combine to form the complete observation space
-        obs_low = np.concatenate([task_obs_low, node_obs_low]).astype(np.float64)
-        obs_high = np.concatenate([task_obs_high, node_obs_high]).astype(np.float64)
+        obs_low = np.concatenate([task_obs_low, node_obs_low]).astype(np.float32)
+        obs_high = np.concatenate([task_obs_high, node_obs_high]).astype(np.float32)
 
-        self.observation_space = Box(low=obs_low, high=obs_high, dtype=np.float64)
+        self.observation_space = Box(low=obs_low, high=obs_high, dtype=np.float32)
         self.current_obs = None
 
         # ACTION SPACE
         self.action_space = Discrete(self.n_qnodes)
 
         # Load QTasks dataset
-        if dataset is None:
-            raise ValueError("Dataset is not specified")
-        self.qtask_dataset = Dataset(dataset)
-        self.rng = default_rng(seed=22)
+        self.qtask_dataset = Dataset(self.dataset_path)
+        self.seed = 22
+        self.rng = default_rng(seed=self.seed)
+        
         # QSimPy environment
         self.qsp_env = simpy.Environment()
         self.setup_quantum_resources()
 
         # Round
         self.round = 1
-        self.seed = 22
         self.round_robin_index = 0
         self.results = [] 
         
@@ -117,10 +121,47 @@ def __init__(
         self.rescheduling_time = 0.01
 
         # Check if evaluation is set
+        if config is None:
+            config = {}
         self.evaluation = config.get("evaluation", False)
         self.policy = config.get("policy", "UnknownPolicy")
         
 
+    def __getstate__(self):
+        """
+        This method prepares the object for serialization (pickling).
+        We remove all objects that cannot be pickled.
+        """
+        state = self.__dict__.copy()
+        
+        # List of attributes to remove before pickling
+        unpicklable_attributes = [
+            'qsp_env', 
+            'broker', 
+            'qnodes', 
+            'rng', 
+            'qtask_dataset'
+        ]
+        
+        for attr in unpicklable_attributes:
+            if attr in state:
+                del state[attr]
+                
+        return state
+
+    def __setstate__(self, state):
+        """
+        This method restores the object after serialization.
+        We re-initialize the objects that we removed in __getstate__.
+        """
+        self.__dict__.update(state)
+        
+        # Re-initialize the unpicklable attributes
+        self.qtask_dataset = Dataset(self.dataset_path)
+        self.rng = default_rng(seed=self.seed)
+        self.qsp_env = simpy.Environment()
+        self.setup_quantum_resources()
+    
     def _get_obs(self):
         """
         Get the current observation of the environment.
@@ -130,7 +171,7 @@ def _get_obs(self):
         """
         # Get the current observation of quantum task
         if self.current_qtask is None:
-            self.qtask_obs = np.array([0, 0, 0, 0], dtype=np.float64)
+            self.qtask_obs = np.array([0, 0, 0, 0], dtype=np.float32)
         else:
             self.qtask_obs = np.array(
                 [
@@ -139,7 +180,7 @@ def _get_obs(self):
                     self.current_qtask.circuit_layers,
                     self.current_qtask.rescheduling_count,
                 ],
-                dtype=np.float64,
+                dtype=np.float32,
             )
 
         # Get the current observation of quantum nodes
@@ -151,14 +192,14 @@ def _get_obs(self):
                     qnode.clops,
                     qnode.next_available_time,
                 ],
-                dtype=np.float64,
+                dtype=np.float32,
             )
             self.qnode_obs.append(qnode_obs)
 
         # Flatten the qnode observations and concatenate with qtask observations
-        qnode_obs_flat = np.concatenate(self.qnode_obs).astype(np.float64)
+        qnode_obs_flat = np.concatenate(self.qnode_obs).astype(np.float32)
         self.current_obs = np.concatenate(
-            (self.qtask_obs, qnode_obs_flat), dtype=np.float64
+            (self.qtask_obs, qnode_obs_flat), dtype=np.float32
         )
         return self.current_obs
 
@@ -177,6 +218,10 @@ def setup_quantum_resources(self):
             for qid, qname in zip(qnode_ids, qnode_names)
         ]
 
+        # Manually assign the name attribute to each node after creation
+        for node, name in zip(self.qnodes, qnode_names):
+            node.name = name
+        
         # Create a Broker
         self.broker = Broker(self.qsp_env, self.qnodes, self.mode)
 
@@ -221,22 +266,22 @@ def generate_qtasks(self):
         self.round += 1
 
     def submit_task_to_qnode(self, qtask, qnode_id=None):
-        reward = 0
         if qnode_id is None:
             qnode_id = self.round_robin_index % self.n_qnodes
             self.round_robin_index += 1
+
+        # Check for task validity against the chosen node
         qtask, waiting_time, execution_time = self.broker.preprocess_qtask(
             qtask, self.qnodes[qnode_id]
         )
+
         if qtask.status == TaskStatus.ERROR:
-            # Apply large penalty to the reward if QTask constraints are not satisfied
-            # Beside, this task need to be rescheduled to another QNode until it can be executed
-            # Put this task back to the queue
+            # Handle infeasible scheduling
             qtask.status = TaskStatus.QUEUED
             qtask.QNode = None
             qtask.rescheduling_count += 1
-            qtask.arrival_time += 1
-            # Find the index to insert the qtask based on arrival_time
+            qtask.arrival_time = self.qsp_env.now + self.rescheduling_time
+            
             index = 0
             while (
                 index < len(self.qtasks)
@@ -244,29 +289,64 @@ def submit_task_to_qnode(self, qtask, qnode_id=None):
             ):
                 index += 1
             self.qtasks.insert(index, qtask)
+            
+            self.qsp_env.run(until=self.qsp_env.now + self.rescheduling_time)
             return -0.1, qtask.rescheduling_count
-        # Submit the qtask to the qnode following the action
+        
+        # If valid, schedule the task
         qtask_execution = self.broker.submit_qtask_to_qnode(
             qtask, self.qnodes[qnode_id]
         )
         self.qsp_env.process(qtask_execution)
-        # Delay time is the time from initial arrival time to the time the task started to be placed in the QNode
-        delay_time = qtask.arrival_time - qtask.init_arrival_time
         
-        # print(f"Estimated waiting time: {waiting_time}")
-        # print(f"Estimated execution time: {execution_time}")
+        # --- START OF FIX ---
+        # Calculate all timing info BEFORE running the simulation
+        
+        # The task will start after the current time + its waiting time.
+        start_time = self.qsp_env.now + waiting_time
+        
+        # We can store this on the object now for logging purposes.
+        qtask.start_time = start_time
+        
+        # Calculate the exact time the simulation needs to run until.
+        completion_time = start_time + execution_time
+        
+        # Calculate the delay from the task's very first arrival.
+        delay_time = start_time - qtask.init_arrival_time
+        
+        # --- END OF FIX ---
+
+        # NOW, run the simulation until this specific task is done
+        self.qsp_env.run(until=completion_time)
+        
         self.results.append({
             'qtask_id': qtask.id,
             'qnode_id': qnode_id,
             'waiting_time': waiting_time,
             'execution_time': execution_time,
-            'rescheduling_count': qtask.rescheduling_count,  # Store the actual count from the task
+            'rescheduling_count': qtask.rescheduling_count,
         })
-        reward = delay_time + waiting_time + execution_time
-        return reward, qtask.rescheduling_count
+        
+        self.serviced_qtasks.append(qtask)
+        
+        # The reward is based on the total time spent by the task in the system
+        total_time_in_system = delay_time + waiting_time + execution_time
+
+        return total_time_in_system, qtask.rescheduling_count
 
     def reset(self, *, seed=None, options=None):
-        super().reset(seed=22)
+        super().reset(seed=seed)
+        if seed is not None:
+            self.seed = seed
+        
+        # Re-initialize the entire simulation environment
+        self.qsp_env = simpy.Environment()
+        self.rng = default_rng(seed=self.seed)
+        self.setup_quantum_resources()
+        self.results = []
+        self.round = 1
+        self.serviced_qtasks = [] 
+        
         self.generate_qtasks()
         self.current_obs = self._get_obs().astype(np.float32)
         info = {}
@@ -298,29 +378,36 @@ def collect_results(self):
         return summary
 
     def step(self, action):
-        # Submit the current qtask to the selected qnode
-        # action is qnode_id
-        # Intermediately reward is the inverse of completion time
-        # Sample Objective: Minimize the total completion time of all qtasks
+        # Submit the current qtask to the selected qnode and run the simulation
+        # The returned `time_reward` is the total time the task spent in the system
         time_reward, _ = self.submit_task_to_qnode(
             self.current_qtask, action
         )
-        reward = 1/time_reward
+
+        # A reward of -0.1 indicates a penalty for an invalid action
+        if time_reward == -0.1:
+            reward = -0.1
+        else:
+            # The objective is to MINIMIZE completion time, so the reward should be inverse
+            # Add a small epsilon to avoid division by zero
+            reward = 1.0 / (time_reward + 1e-6)
 
         scheduled_qtask = self.current_qtask
 
         # Get the next observation
-        # Check if there are more qtasks, if yes, get the next qtask, otherwise set terminated to True
         if len(self.qtasks) > 0:
             self.current_qtask = self.qtasks.pop(0)
+            # Advance simulation time to the arrival of the next task if needed
+            if self.qsp_env.now < self.current_qtask.arrival_time:
+                self.qsp_env.run(until=self.current_qtask.arrival_time)
             terminated = False
         else:
             self.current_qtask = None
             terminated = True
 
         self.current_obs = self._get_obs()
-
-        return self.current_obs, reward, terminated, False, {"scheduled_qtask": scheduled_qtask}
+        info = {"scheduled_qtask_id": scheduled_qtask.id}
+        return self.current_obs, reward, terminated, False, info
 
     def close(self):
         # If the evaluation is set, run the environment and export the results
diff --git a/ray_train_dqn.py b/ray_train_dqn.py
index 8ccab2f..85dd3ef 100644
--- a/ray_train_dqn.py
+++ b/ray_train_dqn.py
@@ -1,18 +1,19 @@
 import argparse
-
 import ray
 from ray import tune, air, train
 from ray.tune.registry import register_env
 from env_creator import qsimpy_env_creator
 from ray.rllib.algorithms.dqn import DQNConfig
+from ray.tune import RunConfig
 from ray.rllib.utils.framework import try_import_tf
 from ray.tune.analysis import ExperimentAnalysis
 import os
+os.environ["RAY_TRAIN_V2_ENABLED"] = "1"
 
 tf1, tf, tfv = try_import_tf()
 parser = argparse.ArgumentParser()
 
-parser.add_argument("--num-cpus", type=int, default=0)
+parser.add_argument("--num-cpus", type=int, default=1)
 
 parser.add_argument(
     "--framework",
@@ -35,29 +36,21 @@
 
     register_env("QSimPyEnv", qsimpy_env_creator)
 
-    replay_config = {
-        "type": "MultiAgentPrioritizedReplayBuffer",
-        "capacity": 60000,
-        "prioritized_replay_alpha": 0.5,
-        "prioritized_replay_beta": 0.5,
-        "prioritized_replay_eps": 3e-6,
-    }
+    # replay_config = {
+    #     "type": "MultiAgentPrioritizedReplayBuffer",
+    #     "capacity": 60000,
+    #     "prioritized_replay_alpha": 0.5,
+    #     "prioritized_replay_beta": 0.5,
+    #     "prioritized_replay_eps": 3e-6,
+    # }
 
     config = (
         DQNConfig()
         .framework(framework=args.framework)
-        .environment(
-            env="QSimPyEnv",
-            env_config={
-                "obs_filter": "rescale_-1_1",
-                "reward_filter": None,
-                "dataset": "qdataset/qsimpyds_1000_sub_26.csv",
-            },
-        )
+        .environment(env="QSimPyEnv",env_config={"obs_filter": "rescale_-1_1","reward_filter": None,"dataset": "qdataset/qsimpyds_1000_sub_26.csv",},)
         .training(
             lr=tune.grid_search([0.01]),
             train_batch_size=tune.grid_search([78]),
-            replay_buffer_config=replay_config,
             num_atoms=tune.grid_search(
                 [
                     10
@@ -68,7 +61,7 @@
             v_min=-10.0,
             v_max=10.0,
         )
-        .rollouts(num_rollout_workers=8)
+        .env_runners(num_env_runners=8)
     )
 
     stop_config = {
diff --git a/results/heuristics/greedy.csv b/results/heuristics/greedy.csv
index 3b19e5e..7115573 100644
--- a/results/heuristics/greedy.csv
+++ b/results/heuristics/greedy.csv
@@ -1,101 +1,101 @@
 Episode,Total Completion Time,Rescheduling Count
-0,774.4230420475817,40.0
-1,1046.894424897719,42.0
-2,617.9205737688227,40.0
-3,628.8109224991855,40.0
-4,866.5606607725742,42.0
-5,566.5663152799716,36.0
-6,413.6463392302391,30.0
-7,432.2598716991536,32.0
-8,870.3762464998516,41.0
-9,1394.6824482677005,42.0
-10,363.90939944019135,39.0
-11,383.7768448815786,33.0
-12,207.1562787454552,35.0
-13,621.3609037155244,35.0
-14,742.9834560198984,36.0
-15,408.37357491369073,29.0
-16,849.3653276907853,40.0
-17,589.6526842487319,39.0
-18,690.1527352313494,38.0
-19,463.1828397210162,44.0
-20,749.1483089017328,37.0
-21,361.10023576100224,33.0
-22,1020.6336747898247,40.0
-23,643.4919808072126,39.0
-24,678.9492887209697,36.0
-25,704.0030903378286,40.0
-26,236.71913086329505,24.0
-27,868.0363015343289,40.0
-28,514.4224902263933,35.0
-29,1029.4922803886213,42.0
-30,886.7245346776365,37.0
-31,586.3764599582873,41.0
-32,433.24367449188094,32.0
-33,653.1261897262499,36.0
-34,554.3090769693335,42.0
-35,700.4555073920585,37.0
-36,601.7057329653607,38.0
-37,670.5812418983837,44.0
-38,771.29270222627,34.0
-39,558.4195175017535,40.0
-40,532.6560439087277,38.0
-41,599.5196239369212,44.0
-42,536.9127082928047,38.0
-43,727.4146780551054,40.0
-44,375.8560508023986,35.0
-45,566.307642637826,37.0
-46,460.37506666810236,39.0
-47,757.2286153476097,41.0
-48,545.5224263252145,32.0
-49,478.29268766411326,42.0
-50,435.43733240245353,38.0
-51,695.6963699855635,39.0
-52,482.25652324277115,37.0
-53,869.7229706811498,41.0
-54,546.1983303906775,31.0
-55,590.9020429390708,35.0
-56,755.3380698294844,40.0
-57,558.4616943255035,40.0
-58,545.7684111909076,40.0
-59,425.70812736455537,37.0
-60,633.0101678477301,35.0
-61,521.8836754508253,36.0
-62,973.8842211028855,43.0
-63,412.82137516049215,38.0
-64,987.7760875697948,40.0
-65,642.5279610549509,39.0
-66,582.1721791618904,36.0
-67,693.9500035705355,40.0
-68,789.7683292375004,38.0
-69,684.3098796725943,40.0
-70,604.3977524815559,36.0
-71,802.168098151363,41.0
-72,788.5206318256928,42.0
-73,676.1757941145282,34.0
-74,589.1742946893258,38.0
-75,787.8254076771084,44.0
-76,589.0176430744142,38.0
-77,769.168050375966,42.0
-78,739.4541400399085,40.0
-79,597.4687583949419,33.0
-80,446.0454398494521,32.0
-81,507.43657309680856,31.0
-82,509.46519499947544,31.0
-83,477.6576370957176,39.0
-84,448.7200313033012,38.0
-85,959.3483665188473,44.0
-86,552.6945242207725,30.0
-87,832.3409587334892,40.0
-88,587.9273806639311,35.0
-89,679.4131679478429,37.0
-90,393.8545712065559,31.0
-91,762.0365058522186,38.0
-92,447.8208740424661,38.0
-93,915.649490044282,40.0
-94,768.9448839386108,42.0
-95,388.82369455317746,33.0
-96,1251.7773158818782,42.0
-97,739.013409053475,41.0
-98,760.3428642461873,43.0
-99,789.5407058575811,42.0
+0,1183.9459122412904,36.0
+1,1304.1060486889678,38.0
+2,1107.3483841461803,38.0
+3,1237.9392352300667,38.0
+4,1236.9295710178753,41.0
+5,1210.8028734110198,36.0
+6,1043.9534820251167,40.0
+7,1156.492801015281,36.0
+8,1220.9602955695482,42.0
+9,1011.3446698887095,37.0
+10,1081.2775353421061,39.0
+11,1244.319998800302,38.0
+12,1218.2735617508893,38.0
+13,1019.388821666389,37.0
+14,1229.8966170481547,39.0
+15,1093.6212427601924,38.0
+16,1231.423899488196,38.0
+17,1367.5884718464686,39.0
+18,1120.9125786460893,38.0
+19,1394.0235256291312,41.0
+20,1298.3253944598105,38.0
+21,988.378698479236,39.0
+22,1236.2641099207906,38.0
+23,1049.1392418742864,39.0
+24,1237.766463115403,38.0
+25,1169.4162655674684,38.0
+26,1425.5167191375926,38.0
+27,1190.8823215669206,37.0
+28,1194.590567863168,37.0
+29,1110.6357167097287,41.0
+30,1231.5172977589273,38.0
+31,1048.4470859354412,39.0
+32,1192.7609613262216,41.0
+33,1282.3116189327266,39.0
+34,1036.2638878508128,38.0
+35,1225.198734340374,39.0
+36,1330.3044485649173,38.0
+37,1198.3259132522317,39.0
+38,1271.4388610294975,39.0
+39,1240.5114604013022,38.0
+40,1141.5616815790954,37.0
+41,1274.9352952524102,35.0
+42,1121.9371921773936,37.0
+43,1195.0337454219111,41.0
+44,1242.812993445969,40.0
+45,1283.902709950512,38.0
+46,1241.8853796051471,39.0
+47,1040.5459565277777,39.0
+48,1276.0220852937023,38.0
+49,1181.8744537609898,36.0
+50,1239.2056215332216,38.0
+51,1102.8949106096657,36.0
+52,953.7059610662435,40.0
+53,1101.4305486729659,36.0
+54,1070.445474088991,39.0
+55,1214.79866104364,38.0
+56,1020.4426571535101,37.0
+57,1349.0980442539358,39.0
+58,1133.8641803805442,38.0
+59,1282.639861556527,42.0
+60,1181.2986079255668,41.0
+61,1145.9037738151753,41.0
+62,1240.06598238621,36.0
+63,1189.4294459065386,39.0
+64,1090.9949717103702,36.0
+65,1327.2917867095198,39.0
+66,1227.4314309062956,38.0
+67,1301.3005820184655,39.0
+68,1153.8009942751514,38.0
+69,1111.8386284684382,38.0
+70,1088.046739061963,42.0
+71,1123.7784870438327,38.0
+72,1041.5766966554759,40.0
+73,1066.0858238109768,40.0
+74,1283.3385153928568,41.0
+75,973.6595792404268,37.0
+76,1213.365629465177,36.0
+77,1348.563805964419,38.0
+78,1272.2176877603745,39.0
+79,1234.2354816808408,35.0
+80,1107.6129207490808,39.0
+81,1181.150685564418,38.0
+82,1292.1929208929669,38.0
+83,1319.2524725250626,39.0
+84,1314.4200041548559,38.0
+85,1227.2961509144877,38.0
+86,1150.4419566864112,38.0
+87,1073.774883730797,39.0
+88,1152.4827565837513,38.0
+89,1167.1131917720652,38.0
+90,1223.875985559418,39.0
+91,1389.297687318999,37.0
+92,1318.553506466468,38.0
+93,1114.2891554425914,38.0
+94,1306.2362995090953,38.0
+95,1216.249038236079,37.0
+96,938.5699404450673,37.0
+97,1232.1592333680205,38.0
+98,1287.1247279794693,39.0
+99,1208.4424941191712,42.0
diff --git a/results/heuristics/greedy_error.csv b/results/heuristics/greedy_error.csv
index e5d68a5..4056309 100644
--- a/results/heuristics/greedy_error.csv
+++ b/results/heuristics/greedy_error.csv
@@ -1,101 +1,101 @@
-Episode,Total Completion Time,Rescheduling Count
-0,779.286564033785,44.0
-1,1018.0907645941475,46.0
-2,641.8455081869836,44.0
-3,654.0831900588066,44.0
-4,871.1009895371011,46.0
-5,585.3419575893722,40.0
-6,446.8945558944891,35.0
-7,512.7857438402183,36.0
-8,870.0392874462106,45.0
-9,1403.2538843569118,46.0
-10,437.20049371329264,43.0
-11,387.3112745833579,37.0
-12,241.9880555126875,40.0
-13,626.9781166282103,39.0
-14,780.902382522772,40.0
-15,388.4511046105547,35.0
-16,827.7077435577246,44.0
-17,607.3581004546566,43.0
-18,700.7635993222689,42.0
-19,499.64671161102183,48.0
-20,755.1151848860904,37.0
-21,395.14827467242446,37.0
-22,1118.8852805272445,44.0
-23,672.2494591832836,43.0
-24,752.6005621924559,40.0
-25,721.8238032533495,44.0
-26,207.0930613935666,28.0
-27,888.658573558916,44.0
-28,534.4488956052584,39.0
-29,1005.5085728583823,43.0
-30,853.3132124534151,41.0
-31,590.8656874378183,45.0
-32,461.01304465604215,36.0
-33,686.935225951273,40.0
-34,577.5721509951466,46.0
-35,716.722166899953,41.0
-36,625.5229764371421,41.0
-37,615.3195559790382,48.0
-38,780.3194866897076,38.0
-39,594.9541520086445,44.0
-40,561.3443611413669,43.0
-41,661.7754295462144,48.0
-42,571.6179140077627,42.0
-43,737.876597753784,44.0
-44,398.5770308682531,39.0
-45,593.1565609649705,41.0
-46,448.31888197741904,43.0
-47,739.4550793811899,45.0
-48,566.6025067912151,36.0
-49,488.08591091471266,46.0
-50,473.32301622092916,41.0
-51,702.8283220644104,43.0
-52,501.7041333591779,41.0
-53,859.9557336531625,45.0
-54,572.9886504975027,34.0
-55,682.6790300121812,39.0
-56,784.8824768279756,44.0
-57,524.2425649438379,44.0
-58,560.7080082720844,41.0
-59,470.0018740168153,41.0
-60,685.8348449770591,39.0
-61,578.3072927716473,41.0
-62,1037.7674743484838,47.0
-63,363.07834646054357,41.0
-64,975.0234557794759,44.0
-65,645.9724131814359,43.0
-66,572.6235504259654,37.0
-67,703.0491048629965,44.0
-68,816.9584956357872,42.0
-69,645.0645671546602,44.0
-70,601.470562470287,36.0
-71,792.2956085695879,45.0
-72,794.2921778200525,46.0
-73,687.8876958511382,38.0
-74,563.7197154668082,43.0
-75,810.3360229023112,48.0
-76,630.9707744234905,42.0
-77,808.783449981612,46.0
-78,760.8511106964423,44.0
-79,614.4434092193461,39.0
-80,476.1463679302302,36.0
-81,547.9441818033677,35.0
-82,545.6914809418145,35.0
-83,485.7904843422369,39.0
-84,450.62171141799803,42.0
-85,1073.4248717263085,48.0
-86,551.0077594549593,34.0
-87,825.3505545480989,44.0
-88,610.0023367103439,39.0
-89,697.8224633469039,41.0
-90,412.86652903623803,35.0
-91,769.2821875828371,42.0
-92,430.0895171689559,41.0
-93,955.1218764261479,44.0
-94,774.9226519278552,46.0
-95,410.0365030939497,37.0
-96,1256.3691795545785,46.0
-97,767.2365327297308,45.0
-98,752.0432471902363,47.0
-99,798.6604685050595,46.0
+Episode,Total Completion Time,Rescheduling Count
+0,1239.048764954939,39.0
+1,1447.3809102195498,40.0
+2,1357.8592302626796,41.0
+3,1426.5984717667798,40.0
+4,1394.9996964120105,40.0
+5,1412.4067564950926,40.0
+6,1191.8641597521364,39.0
+7,1369.5599085838317,41.0
+8,1339.3670167557111,44.0
+9,1316.256617779292,40.0
+10,1212.947162058655,40.0
+11,1315.5206810147536,39.0
+12,1246.0090195324747,41.0
+13,1366.1959387382353,40.0
+14,1478.5206967556799,41.0
+15,1355.715657016669,40.0
+16,1398.573522782949,41.0
+17,1394.897703598718,41.0
+18,1241.8491035509005,40.0
+19,1451.7677670922074,41.0
+20,1587.4008554661368,39.0
+21,1376.4154484092592,41.0
+22,1368.617729101173,39.0
+23,1407.958307495498,40.0
+24,1398.8189078951605,40.0
+25,1350.8575797398626,41.0
+26,1392.9980640182523,40.0
+27,1391.7148493963932,41.0
+28,1212.3957010955828,35.0
+29,1410.6336057756723,40.0
+30,1290.5416794481623,39.0
+31,1318.9594541601798,40.0
+32,1431.2819413412565,40.0
+33,1448.984343305557,40.0
+34,1370.4671158191245,40.0
+35,1278.436882343706,38.0
+36,1441.024057680508,40.0
+37,1519.152766779766,41.0
+38,1332.764898867378,40.0
+39,1378.6588217403244,40.0
+40,1405.4835611885142,41.0
+41,1408.1875283395673,40.0
+42,1377.5221767896976,40.0
+43,1405.6932855039502,40.0
+44,1502.6204558223353,40.0
+45,1292.0080638511467,41.0
+46,1461.478976682054,40.0
+47,1173.0722514624342,40.0
+48,1458.11592944274,40.0
+49,1437.109101365366,40.0
+50,1348.263786161681,41.0
+51,1245.4731287417042,41.0
+52,1207.9527096172408,38.0
+53,1370.9602446541264,40.0
+54,1494.6075327702733,40.0
+55,1409.7283273069293,40.0
+56,1382.592160415308,41.0
+57,1417.6208083973188,40.0
+58,1270.4401160749385,41.0
+59,1441.9791558577772,44.0
+60,1360.0366106276126,40.0
+61,1342.5820858907832,40.0
+62,1391.6678384105055,40.0
+63,1374.657497027954,44.0
+64,1414.132261469257,41.0
+65,1239.539215641884,39.0
+66,1287.5161549173463,40.0
+67,1124.2196070761331,44.0
+68,1474.8428768126796,41.0
+69,1111.0591749111454,43.0
+70,1294.0883411166772,44.0
+71,1385.9202239156766,39.0
+72,1263.1937058490112,39.0
+73,1571.2648734549819,40.0
+74,1346.4777982177188,40.0
+75,1341.1920443637373,40.0
+76,1307.8279986720497,41.0
+77,1442.3777391602084,40.0
+78,1109.553424136137,44.0
+79,1405.6549117074155,41.0
+80,1049.6954729488489,40.0
+81,1436.421981938819,40.0
+82,1168.6795780273285,41.0
+83,1411.9838896494286,40.0
+84,1494.123101631842,40.0
+85,1368.5519941081814,41.0
+86,1219.095679234422,39.0
+87,1411.0638647887636,41.0
+88,1337.403006103293,41.0
+89,1375.6374114372045,41.0
+90,1376.9316550241626,40.0
+91,1584.2203210207706,41.0
+92,1298.028778210999,41.0
+93,1318.8314543537363,41.0
+94,1364.6143698879666,39.0
+95,1424.4839910707544,40.0
+96,1435.5224124829674,40.0
+97,1295.3617411987918,41.0
+98,1309.8264641208705,39.0
+99,1470.2992080648744,41.0
diff --git a/results/heuristics/random.csv b/results/heuristics/random.csv
index bbc816a..034f207 100644
--- a/results/heuristics/random.csv
+++ b/results/heuristics/random.csv
@@ -1,101 +1,101 @@
 Episode,Total Completion Time,Rescheduling Count
-0,1478.5757952463066,15.0
-1,1330.0717844035278,30.0
-2,1358.4802022002825,14.0
-3,896.0747618793595,15.0
-4,1593.6304325693502,11.0
-5,1258.9852748962012,17.0
-6,1168.6295077047514,10.0
-7,939.3489087440826,25.0
-8,1317.2328963468426,12.0
-9,1026.1666454641281,5.0
-10,1779.2610736119923,9.0
-11,1065.4091749296842,9.0
-12,1113.8336344676886,10.0
-13,1271.9906573229684,8.0
-14,646.9844940702196,8.0
-15,722.5758079698842,19.0
-16,542.4364262618708,11.0
-17,1930.820470337957,26.0
-18,1532.3709399390386,19.0
-19,911.3854104249705,21.0
-20,1110.4689838317045,16.0
-21,826.8928272374625,13.0
-22,908.3708768892137,10.0
-23,1141.046644951563,12.0
-24,1036.1727285265406,16.0
-25,720.6487746534319,16.0
-26,856.415307628558,13.0
-27,662.4360868326567,7.0
-28,534.3095941428542,13.0
-29,1194.0801396555087,18.0
-30,731.8626413120057,12.0
-31,1048.5611447527035,12.0
-32,792.8744885216804,24.0
-33,1382.1510884776408,16.0
-34,700.6986682655009,10.0
-35,1370.850122338716,5.0
-36,789.7678264415608,14.0
-37,794.0932177655627,7.0
-38,1347.5809260895185,14.0
-39,575.5529914939242,8.0
-40,2789.7382001725864,20.0
-41,1843.8918971378828,13.0
-42,628.7543454347912,15.0
-43,1242.3926080693172,18.0
-44,1591.3038021635002,13.0
-45,733.5976211045506,18.0
-46,697.285717128637,9.0
-47,1004.1860794312714,14.0
-48,971.4523252511981,9.0
-49,782.7027681875801,9.0
-50,1096.0961398848933,15.0
-51,1702.1268987637109,13.0
-52,2911.3100280435533,28.0
-53,632.5023550220482,11.0
-54,1089.277243557256,8.0
-55,995.7650568151922,16.0
-56,1400.03726152878,10.0
-57,628.8026716452497,9.0
-58,976.5280579120243,4.0
-59,1566.4428430697599,16.0
-60,1517.8556731435378,11.0
-61,1286.4812745493296,16.0
-62,1041.0898217439374,19.0
-63,842.2850871374727,15.0
-64,963.8396078186036,15.0
-65,744.1331633393584,10.0
-66,901.5830860058976,9.0
-67,476.9874574733113,17.0
-68,663.6282355387759,8.0
-69,1103.5751434524334,20.0
-70,1390.1220640853858,13.0
-71,1000.9959229584504,10.0
-72,776.2366402936705,4.0
-73,1154.914863402619,9.0
-74,2047.6674411643612,14.0
-75,2667.05460364607,13.0
-76,887.3010218508851,8.0
-77,895.6278939051847,5.0
-78,1156.3472075100271,8.0
-79,678.8417681934935,11.0
-80,778.1908213720845,12.0
-81,1215.5078473397223,10.0
-82,998.1348328298731,11.0
-83,1408.2445364017726,8.0
-84,766.8626439732058,15.0
-85,890.0562179830085,6.0
-86,696.7525782879777,8.0
-87,881.5376001686645,15.0
-88,436.1512366383241,8.0
-89,876.1794201725302,9.0
-90,1018.4002073712784,13.0
-91,646.7646596599425,11.0
-92,2720.438865774572,16.0
-93,1474.6468750711817,9.0
-94,1586.7329733839,18.0
-95,1373.6813442230577,14.0
-96,1724.387722049794,12.0
-97,437.1704602696404,15.0
-98,1572.289318568651,7.0
-99,1032.2216660118052,15.0
+0,5198.593881514102,13.0
+1,5613.302968998986,14.0
+2,4003.990819664853,19.0
+3,8444.450131784446,9.0
+4,4286.1220704825155,12.0
+5,8031.450448715791,10.0
+6,6789.781269221228,11.0
+7,3543.4557720357516,15.0
+8,12273.37572409921,9.0
+9,9646.92642413784,11.0
+10,8813.460771810749,11.0
+11,4553.869421968707,16.0
+12,7904.990971970607,11.0
+13,15807.862130028756,9.0
+14,3380.1283729960246,22.0
+15,4837.782106291301,8.0
+16,3544.7102284757507,22.0
+17,6817.367764301595,12.0
+18,9037.215121279707,9.0
+19,6416.54946419231,19.0
+20,7014.375858135391,13.0
+21,8822.041678422705,7.0
+22,4991.453208830219,9.0
+23,4483.4764827903355,10.0
+24,4005.296517933301,14.0
+25,2277.2058361203003,21.0
+26,4383.8941244667685,18.0
+27,3148.304079331725,11.0
+28,4843.388450419759,11.0
+29,4354.150660936775,22.0
+30,5197.680892335768,8.0
+31,5391.891422966844,16.0
+32,3768.2256284565347,16.0
+33,9351.238154931527,15.0
+34,5471.762399949438,10.0
+35,6075.139230340897,10.0
+36,5720.0022304127,17.0
+37,5863.970166233917,7.0
+38,3892.8799395691417,7.0
+39,8681.737864246154,10.0
+40,5266.070835987855,21.0
+41,4125.918679463659,11.0
+42,2073.4457855385367,20.0
+43,6934.139143525072,10.0
+44,4054.2488036486807,7.0
+45,5651.354335859565,12.0
+46,3814.7668949773306,17.0
+47,6966.617077933323,7.0
+48,8215.29856207512,10.0
+49,8451.562759253464,5.0
+50,9009.005066346415,15.0
+51,3508.626605506074,14.0
+52,6709.753156042707,14.0
+53,4076.025786425259,10.0
+54,5463.946602458646,13.0
+55,9497.415998368384,13.0
+56,3597.1930906058383,15.0
+57,3131.0649049685776,15.0
+58,8327.734181098755,6.0
+59,6004.484850597683,11.0
+60,4228.066800751873,6.0
+61,5239.487452821901,6.0
+62,7177.82395018398,16.0
+63,6525.6778982239575,14.0
+64,4990.597348920271,14.0
+65,3736.1051504266793,12.0
+66,4841.75723597414,6.0
+67,8584.249235020428,17.0
+68,4066.003577074813,17.0
+69,3398.213883587471,10.0
+70,5938.105106204257,9.0
+71,4856.153969281786,11.0
+72,4147.926325279979,17.0
+73,3173.035681716114,14.0
+74,4745.548226057148,13.0
+75,4776.743268000405,21.0
+76,4607.533759486629,14.0
+77,11686.28389430349,9.0
+78,4377.16607447434,20.0
+79,5250.1168560443675,13.0
+80,7136.780606628455,11.0
+81,5395.92032519524,11.0
+82,5548.126131897136,13.0
+83,4505.617100369699,11.0
+84,4187.86187803539,19.0
+85,3963.251725091569,14.0
+86,5332.233057462485,16.0
+87,5681.587673058929,17.0
+88,5728.9023137522245,3.0
+89,7922.29047185589,5.0
+90,5235.859484687564,16.0
+91,8972.26163422719,4.0
+92,4345.741343872391,10.0
+93,1644.578171259856,34.0
+94,2971.0588736881214,22.0
+95,17173.949128379267,5.0
+96,2659.3054093785963,18.0
+97,4269.8547739742835,13.0
+98,8678.250330361365,23.0
+99,5422.7976721694295,8.0
diff --git a/results/heuristics/round_robin.csv b/results/heuristics/round_robin.csv
index beddae4..0555ae3 100644
--- a/results/heuristics/round_robin.csv
+++ b/results/heuristics/round_robin.csv
@@ -1,101 +1,101 @@
 Episode,Total Completion Time,Rescheduling Count
-0,896.6456654046958,12.0
-1,1871.0879022002375,16.0
-2,510.79570315056674,12.0
-3,824.1042787696023,13.0
-4,793.7566353584212,16.0
-5,1195.5185394703121,11.0
-6,1049.0003821311604,11.0
-7,529.3791395821091,7.0
-8,1118.336106206277,9.0
-9,673.4589619263967,13.0
-10,1469.0348603956381,11.0
-11,494.8463140046214,13.0
-12,793.3971136973067,12.0
-13,1100.5065892325117,12.0
-14,1591.2272687430552,13.0
-15,477.39917784322813,12.0
-16,1007.4534663044803,13.0
-17,958.4246494918717,13.0
-18,457.6562821021427,12.0
-19,503.66683242919765,7.0
-20,720.4785259890806,16.0
-21,874.4693058959571,13.0
-22,1044.4341061423006,11.0
-23,637.3702856308968,7.0
-24,1226.0152274772724,13.0
-25,598.0127872691697,16.0
-26,698.4138902971073,11.0
-27,1081.5844919875208,11.0
-28,1166.6331596861705,13.0
-29,756.227117050151,12.0
-30,1002.0314899664113,14.0
-31,775.3366726813233,13.0
-32,1007.9649681354205,12.0
-33,613.8010912606667,9.0
-34,805.781126111259,14.0
-35,1004.9199352993268,12.0
-36,1002.8114451253723,11.0
-37,1083.08481643458,12.0
-38,665.3124446637631,11.0
-39,1108.096700775744,14.0
-40,638.3453855845744,8.0
-41,498.09976703964713,12.0
-42,1015.830552685081,13.0
-43,1138.6822736859224,16.0
-44,633.7455602973097,5.0
-45,944.2120125318447,13.0
-46,828.9142438710427,11.0
-47,799.1685394622021,11.0
-48,1001.5054643858178,16.0
-49,680.4556417750368,7.0
-50,653.8649837420505,11.0
-51,515.6572095931972,13.0
-52,1319.0663130611722,11.0
-53,1013.0111902857732,12.0
-54,1386.6047380820037,16.0
-55,1129.9116464179374,12.0
-56,818.6890571057631,13.0
-57,839.8067544380015,12.0
-58,1379.5509489644949,13.0
-59,649.5536809931372,13.0
-60,956.2737942179783,12.0
-61,1096.2702825791414,11.0
-62,871.9568993703994,12.0
-63,833.2604699493944,12.0
-64,985.3178404348411,13.0
-65,739.4419406882281,13.0
-66,594.903416989284,13.0
-67,629.7017297275405,12.0
-68,859.9884263667459,16.0
-69,741.1079985813102,6.0
-70,1096.235995871185,13.0
-71,1077.06661046078,12.0
-72,744.8403333430768,12.0
-73,581.6590883388335,8.0
-74,723.1321970419509,9.0
-75,583.8902999476568,11.0
-76,863.0820192112726,8.0
-77,630.7861055399047,8.0
-78,780.8301014046206,13.0
-79,522.9615301998499,9.0
-80,330.4524344935783,12.0
-81,1108.5922830379768,11.0
-82,697.7588930036542,8.0
-83,1143.4940273168565,13.0
-84,965.190344518973,11.0
-85,1039.5953489887909,13.0
-86,756.1646445429772,13.0
-87,1233.6372212514466,16.0
-88,1136.2305108205057,12.0
-89,1346.8606566757437,16.0
-90,1247.0016497513018,11.0
-91,371.3166356704254,8.0
-92,779.4272937782516,13.0
-93,761.2030528745827,14.0
-94,860.4854299738633,11.0
-95,1020.7382699857077,12.0
-96,592.0044182627178,8.0
-97,1049.3217613015659,13.0
-98,1099.9249351141862,16.0
-99,656.064437356785,12.0
+0,6506.023895405447,16.0
+1,6610.537996610873,16.0
+2,6846.659553690184,16.0
+3,6758.504057494352,16.0
+4,6935.5544384370705,13.0
+5,7034.135898475587,16.0
+6,6630.794734678966,16.0
+7,6970.078715556613,16.0
+8,7112.18140915512,16.0
+9,6235.4468231614255,13.0
+10,7051.500717607177,16.0
+11,7216.2157048645295,16.0
+12,6589.7897917176415,16.0
+13,6475.80104318339,13.0
+14,6439.4163434859,13.0
+15,6543.866608779393,13.0
+16,7093.465036001735,16.0
+17,6757.44809723448,16.0
+18,6904.551072364078,16.0
+19,6675.350369769198,13.0
+20,7021.342291105484,16.0
+21,6494.906998854868,16.0
+22,6989.683508227692,16.0
+23,7060.342117872161,16.0
+24,7123.209574178231,13.0
+25,7266.365010631792,16.0
+26,7251.445393962951,16.0
+27,6656.330236205487,16.0
+28,6424.429828939011,16.0
+29,7066.849436163771,16.0
+30,7253.369565948225,16.0
+31,6978.442363573118,16.0
+32,6885.145285326886,13.0
+33,6946.9901296425505,16.0
+34,6512.521551042265,16.0
+35,6665.004996531719,16.0
+36,6789.511531020305,16.0
+37,6738.8148863974975,16.0
+38,7320.744961460436,13.0
+39,7017.344196410132,16.0
+40,7264.792748208192,16.0
+41,6955.561964184718,16.0
+42,6781.213979313943,13.0
+43,6427.086250294119,16.0
+44,6683.955901743888,16.0
+45,5366.973379780834,13.0
+46,7004.919659115017,16.0
+47,7079.4644729433685,16.0
+48,7234.318755729135,16.0
+49,6948.173876165056,13.0
+50,6719.570521151857,16.0
+51,6587.768679899307,16.0
+52,7002.334776695339,16.0
+53,6476.882502780452,16.0
+54,6794.359389228334,16.0
+55,7156.545013947016,16.0
+56,6656.635646965103,16.0
+57,6863.35786749546,16.0
+58,6742.683432799005,16.0
+59,7775.248641502792,16.0
+60,6615.35738330774,16.0
+61,6223.759804658276,13.0
+62,6562.7477218648555,16.0
+63,7641.270244264278,16.0
+64,6871.646902155131,16.0
+65,6899.876413500736,16.0
+66,7278.690345044732,16.0
+67,7275.671690408349,16.0
+68,7076.685840245986,16.0
+69,5141.4446956918755,13.0
+70,7328.401230513089,16.0
+71,6863.790012754111,16.0
+72,7026.412884135474,16.0
+73,7301.651117051579,16.0
+74,6409.227403219812,13.0
+75,6567.213961979742,13.0
+76,6663.508475995368,16.0
+77,7578.953888858407,16.0
+78,7155.890994996379,16.0
+79,6937.780072897207,16.0
+80,6710.903428646368,16.0
+81,6820.6525235469435,16.0
+82,6697.00836153118,16.0
+83,7292.794307084686,16.0
+84,7545.835694224122,16.0
+85,6921.265195182353,16.0
+86,6736.327830387691,16.0
+87,6842.6715263192555,16.0
+88,6508.873920798973,16.0
+89,6626.557140609311,16.0
+90,6666.8699271853,16.0
+91,6695.19649852262,16.0
+92,6930.479641577,16.0
+93,6623.221475487163,16.0
+94,6989.950575953237,16.0
+95,7093.563345244263,16.0
+96,6950.564904384792,16.0
+97,6908.206776726623,16.0
+98,7110.7916567199545,16.0
+99,7518.08562269474,16.0
diff --git a/train_heuristics.py b/train_heuristics.py
index 9d79d0d..2d0e54b 100644
--- a/train_heuristics.py
+++ b/train_heuristics.py
@@ -1,51 +1,68 @@
+from multiprocessing import Process
 from env_creator import qsimpy_env_creator
 import os
+import sys
 import csv
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.ticker as mticker
+from qsimpy import TaskStatus 
+import numpy as np 
+import random 
 
 class HeuristicSolutions:
     def __init__(self, env, num_episodes=100):
-
-        # Initialize the environment
         self.env = env
         self.num_episodes = num_episodes
-
-        # Initialize the results of heuristic solutions
         self.results = []
-        # Round Robin index for the QNodes. Example: [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, ...]
         self.rr_index = 0
-        # Priority index of Greedy solution after sorting the QNodes based on the waiting time
         self.greedy_index = 0
+        
+        # --- NEW: Attributes for WRR ---
+        self.wrr_list = []
+        self.wrr_index = 0
+        # --- END NEW ---
+
+    def _initialize_episode_strategies(self):
+        """Initializes/resets strategies that require setup at the start of an episode."""
+        # Create the weighted list for Weighted Round Robin (WRR) based on CLOPS
+        qnodes = self.env.unwrapped.qnodes
+        self.wrr_list = []
+        # We use a base multiplier to avoid tiny floats and ensure integer repetitions
+        clops_values = [node.clops for node in qnodes]
+        min_clops = min(c for c in clops_values if c > 0)
+        
+        for i, node in enumerate(qnodes):
+            # The weight is how many times a node's index appears in the list
+            weight = int(round(node.clops / min_clops))
+            self.wrr_list.extend([i] * weight)
+        
+        random.shuffle(self.wrr_list) # Shuffle to distribute nodes evenly
+        self.wrr_index = 0
 
     def run(self, control):
         """
         Run the heuristic solutions for the given algorithm (control).
         Args:
-            - control (str): The heuristic algorithm to use. Options: "greedy", "random", "round_robin", "greedy_error"
+            - control (str): The heuristic algorithm to use.
         """
-
         self.results = []
-        # Reset the subset of QTasks 
-        self.env.round = 1
-
-        for _ in range(self.num_episodes):
+        self.env.unwrapped.round = 1
 
-            # Initialize the temporary array to store the results of the QTasks execution for each episode
-            arr_temp = {
-                "total_completion_time": 0.0,
-                "rescheduling_count": 0.0
-            }
+        for episode_num in range(self.num_episodes):
+            arr_temp = {"total_completion_time": 0.0, "rescheduling_count": 0.0}
             terminated = False
 
-            # Reset the environment and setup the quantum resources
-            self.env.reset()
-            self.env.setup_quantum_resources()
+            self.env.reset(seed=episode_num)
+            self.env.unwrapped.setup_quantum_resources()
             self.rr_index = 0
+            self.greedy_index = 0
+            
+            # --- NEW: Initialize strategies for the episode ---
+            self._initialize_episode_strategies()
+            # --- END NEW ---
 
             while not terminated:
-                # Get the action with the given control
                 if control == "greedy":
                     action = self.greedy(self.greedy_index)
                 elif control == "random":
@@ -54,143 +71,264 @@ def run(self, control):
                     action = self.round_robin()
                 elif control == "greedy_error":
                     action = self.greedy_error(self.greedy_index)
+                elif control == "ect":
+                    action = self.earliest_completion_time()
+                elif control == "sjf":
+                    action = self.shortest_job_first()
+                # --- NEW ALGORITHMS ---
+                elif control == "luf":
+                    action = self.lowest_utilization_first()
+                elif control == "wrr":
+                    action = self.weighted_round_robin()
+                elif control == "rbn":
+                    action = self.random_best_n(n=3)
+                # --- END NEW ALGORITHMS ---
                 
+                task_to_be_scheduled = self.env.unwrapped.current_qtask
                 obs, reward, terminated, done, info = self.env.step(action)
                 
-                # If the QNode is busy or not satisfied, move to the next priority QNode
-                self.greedy_index += 1
+                if reward < 0: 
+                    self.greedy_index += 1
                 if reward > 0:
-                    """Get the results of the QTask execution
-
-                    Values:
-                        - Total Completion Time: waiting_time + execution_time
-                        - Rescheduling Count: rescheduling_count
-                    """
-                    # Reset priority index of Greedy solution if QTasks are satisfied
                     self.greedy_index = 0
+                    arr_temp["total_completion_time"] += task_to_be_scheduled.waiting_time + task_to_be_scheduled.execution_time
+                    arr_temp["rescheduling_count"] += task_to_be_scheduled.rescheduling_count
 
-                    arr_temp["total_completion_time"] += info["scheduled_qtask"].waiting_time + info["scheduled_qtask"].execution_time
-                    arr_temp["rescheduling_count"] += info["scheduled_qtask"].rescheduling_count
-            self.env.qsp_env.run()
-            # Final results of the episode
+            sys.stdout.write("\033[F\033[K")
+            print(f"[{control}] progress: {episode_num + 1}/{self.num_episodes}")
+            self.env.unwrapped.qsp_env.run()
             self.results.append(arr_temp)
+            
+            if episode_num == self.num_episodes - 1:
+                self._collect_and_summarize_metrics(control)
 
-        # Save the results to a CSV file
         self._save_to_csv(control)
-                
+        
     def greedy(self, greedy_index):
-        # Sort the QNodes based on the next available time (or waiting time) and select the QNode with the smallest waiting time
-        greedy_strategy = sorted(self.env.qnodes, key=lambda x: x.next_available_time)
-        return self.env.qnodes.index(greedy_strategy[greedy_index])
+        greedy_strategy = sorted(self.env.unwrapped.qnodes, key=lambda x: x.next_available_time)
+        safe_index = min(greedy_index, len(greedy_strategy) - 1)
+        return self.env.unwrapped.qnodes.index(greedy_strategy[safe_index])
 
     def random(self):
-        # Randomly select a QNode
-        action = self.env.action_space.sample()
-        return action
-    
+        return self.env.action_space.sample()
+
     def round_robin(self):
-        # Select the QNode based on the Round Robin index
-        action = self.rr_index % self.env.n_qnodes
+        action = self.rr_index % self.env.unwrapped.n_qnodes
         self.rr_index += 1
         return action
-    
+
     def greedy_error(self, greedy_index, g_error="Readout_assignment_error"):
-        # Sort the QNodes based on the next available time (or waiting time) and select the QNode with the 
-        # smallest waiting time and smallest error (default is readout_error) in the qnode
-    
-        greedy_strategy = sorted(self.env.qnodes, key=lambda x: (x.next_available_time, x.error[g_error]))
-        return self.env.qnodes.index(greedy_strategy[greedy_index])
+        greedy_strategy = sorted(self.env.unwrapped.qnodes, key=lambda x: (x.next_available_time, x.error[g_error]))
+        safe_index = min(greedy_index, len(greedy_strategy) - 1)
+        return self.env.unwrapped.qnodes.index(greedy_strategy[safe_index])
 
-    def _save_to_csv(self, control) -> None:
+    def earliest_completion_time(self):
+        qtask = self.env.unwrapped.current_qtask
+        qnodes = self.env.unwrapped.qnodes
+        broker = self.env.unwrapped.broker
+        completion_times = []
+        for node in qnodes:
+            processed_task, waiting_time, execution_time = broker.preprocess_qtask(qtask, node)
+            if processed_task.status == TaskStatus.ERROR:
+                completion_times.append(float('inf'))
+            else:
+                est_completion_time = self.env.unwrapped.qsp_env.now + waiting_time + execution_time
+                completion_times.append(est_completion_time)
+        if all(ct == float('inf') for ct in completion_times):
+            return self.random()
+        return np.argmin(completion_times)
+
+    def shortest_job_first(self):
+        qtask = self.env.unwrapped.current_qtask
+        qnodes = self.env.unwrapped.qnodes
+        broker = self.env.unwrapped.broker
+        execution_times = []
+        for node in qnodes:
+            processed_task, _, execution_time = broker.preprocess_qtask(qtask, node)
+            if processed_task.status == TaskStatus.ERROR:
+                execution_times.append(float('inf'))
+            else:
+                execution_times.append(execution_time)
+        if all(et == float('inf') for et in execution_times):
+            return self.random()
+        return np.argmin(execution_times)
+
+    def lowest_utilization_first(self):
         """
-        Save values and episodes to a CSV file.
+        Assigns the task to the node that has been historically the least busy.
+        Utilization = Total Busy Time of a Node / Total Simulation Time
         """
+        qnodes = self.env.unwrapped.qnodes
+        # We use getattr to safely get total_busy_time, defaulting to 0 if not present
+        busy_times = [getattr(node, 'total_busy_time', 0.0) for node in qnodes]
+        
+        # Check for invalid tasks on each node
+        valid_nodes = []
+        for i, node in enumerate(qnodes):
+            # A simple validity check without full preprocessing
+            if self.env.unwrapped.current_qtask.qubit_number <= node.qubit_number:
+                valid_nodes.append(i)
+        
+        if not valid_nodes:
+            return self.random() # All nodes are invalid for this task
 
-        file_name = "./results/heuristics/" 
+        # Find the node with the minimum busy time *among the valid ones*
+        min_busy_time = float('inf')
+        action = -1
+        for i in valid_nodes:
+            if busy_times[i] < min_busy_time:
+                min_busy_time = busy_times[i]
+                action = i
+
+        return action
+
+    def weighted_round_robin(self):
+        """
+        Assigns tasks in a round-robin fashion but gives more time slots
+        to more powerful nodes (based on CLOPS).
+        """
+        if not self.wrr_list: # Failsafe if list is empty
+            return self.random()
+        
+        action = self.wrr_list[self.wrr_index % len(self.wrr_list)]
+        self.wrr_index += 1
+        return action
 
-        if not os.path.exists(file_name):
-            os.makedirs(file_name)
+    def random_best_n(self, n=3):
+        """
+        A hybrid approximation algorithm. Randomly selects N nodes and then
+        applies the optimal ECT strategy to that subset.
+        """
+        qnodes = self.env.unwrapped.qnodes
+        num_qnodes = len(qnodes)
+        
+        # Create a list of node indices [0, 1, 2, 3, 4]
+        node_indices = list(range(num_qnodes))
+        
+        # If N is larger than available nodes, just use all nodes
+        n = min(n, num_qnodes)
+        
+        # Randomly sample N unique indices
+        sampled_indices = random.sample(node_indices, n)
+        
+        # Now, apply the ECT logic only on the sampled nodes
+        qtask = self.env.unwrapped.current_qtask
+        broker = self.env.unwrapped.broker
+        
+        best_completion_time = float('inf')
+        action = -1
+        
+        for index in sampled_indices:
+            node = qnodes[index]
+            processed_task, waiting_time, execution_time = broker.preprocess_qtask(qtask, node)
+            
+            if processed_task.status != TaskStatus.ERROR:
+                est_completion_time = self.env.unwrapped.qsp_env.now + waiting_time + execution_time
+                if est_completion_time < best_completion_time:
+                    best_completion_time = est_completion_time
+                    action = index
+        
+        # If no valid node was found in the sample, fall back to random
+        return action if action != -1 else self.random()
 
+    def _save_to_csv(self, control) -> None:
+        file_name = "./results/heuristics/" 
+        if not os.path.exists(file_name): os.makedirs(file_name)
         file_name += control + ".csv"
-        # Open the CSV file in write mode
         with open(file_name, mode='w', newline='') as file:
             writer = csv.writer(file)
-            
-            # Write the header
             writer.writerow(['Episode', 'Total Completion Time', 'Rescheduling Count'])
-            
-            # Write the data
             for i in range(len(self.results)):
                 writer.writerow([i, self.results[i]['total_completion_time'], self.results[i]['rescheduling_count']])
         print("CSV file saved to " + file_name)
 
     def _plot_results(self, paths) -> None:
-        """
-        Plot the results of the episodes.
-        """
+        plt.figure(figsize=(14, 8))
         for path in paths:
             df1 = pd.read_csv(path['path'])
-
             plt.plot(df1['Episode'], df1['Total Completion Time'], ".-", color=path['color'], label=path['label'])
-
             self._summarize_results(df1, path['label'])
-        
         plt.ylabel('Total Completion Time')
         plt.xlabel('Evaluation Episode')
-        plt.legend(loc=2)
+        plt.title('Heuristic Method Performance Comparison')
+        plt.legend(loc='best')
+        plt.grid(True)
         plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(10))
+        if not os.path.exists("./results/heuristics/"): os.makedirs("./results/heuristics/")
+        plt.savefig("./results/heuristics/performance_comparison.png")
         plt.show()
 
     def _summarize_results(self, values, label) -> None:
-        """
-        Summarize the results of the episodes.
-        """
-        print("Results Summary for" + label + "solution:")
+        print(f"\n--- Results Summary for {label} solution ---")
         print(f"Number of Episodes: {self.num_episodes}")
-        print(f"Total Completion Time: {sum(values['Total Completion Time'])}")
-        print(f"Average Rescheduling Count: {sum(values['Rescheduling Count']) / self.num_episodes}")
+        print(f"Average Total Completion Time: {sum(values['Total Completion Time']) / self.num_episodes:.2f}")
+        print(f"Average Rescheduling Count: {sum(values['Rescheduling Count']) / self.num_episodes:.2f}")
+        print("------------------------------------------")
+    
+    def _collect_and_summarize_metrics(self, control):
+        serviced_tasks = self.env.unwrapped.serviced_qtasks
+        serviced_tasks = self.env
+        qnodes = self.env.unwrapped.qnodes
+        total_sim_time = self.env.unwrapped.qsp_env.now
+        if not serviced_tasks:
+            print(f"\n--- No tasks were serviced for {control}. Cannot generate metrics. ---")
+            return
+        total_qtasks = len(serviced_tasks)
+        avg_wait_time = sum(task.waiting_time for task in serviced_tasks) / total_qtasks
+        throughput = total_qtasks / total_sim_time if total_sim_time > 0 else 0
+        failure_threshold = 5
+        failed_tasks = sum(1 for task in serviced_tasks if task.rescheduling_count >= failure_threshold)
+        success_rate = (total_qtasks - failed_tasks) / total_qtasks * 100
+        print(f"\n--- Detailed Metrics Summary for '{control}' ---")
+        print(f"Total Simulation Time: {total_sim_time:.2f}s")
+        print(f"Throughput: {throughput:.4f} tasks/sec")
+        print(f"Average Wait Time per Task: {avg_wait_time:.4f}s")
+        print(f"Success Rate (rescheduled < {failure_threshold} times): {success_rate:.2f}%")
+        print(f"Number of Failed Tasks: {failed_tasks}")
+        print("\n--- Quantum Computer Utilization ---")
+        for node in qnodes:
+            busy_time = getattr(node, 'total_busy_time', 0.0)
+            utilization = (busy_time / total_sim_time) * 100 if total_sim_time > 0 else 0
+            print(f"  - {node.name} (ID: {node.id}): {utilization:.2f}% utilization")
+        print("------------------------------------------")
 
 
 if __name__ == "__main__":
-
-    # Create the QSimPy environment
-    env_config={
-                "obs_filter": "rescale_-1_1",
-                "reward_filter": None,
-                "dataset": "qdataset/qsimpyds_1000_sub_26.csv",
-            }
-
+    env_config = {
+        "obs_filter": "rescale_-1_1",
+        "reward_filter": None,
+        "dataset": "qdataset/qsimpyds_1000_sub_26.csv",
+    }
     env = qsimpy_env_creator(env_config)
-
-    # Run the heuristic solutions
+    
+    print("\n\n\n")
+    
     heuristics = HeuristicSolutions(env, num_episodes=100)
-    heuristics.run("greedy")
-    heuristics.run("random")
-    heuristics.run("round_robin")
-    heuristics.run("greedy_error")
+    heuristics.run("sjf")
+    # methods = [
+    #     'greedy', 'random', 'round_robin', 'greedy_error', 
+    #     'ect', 'sjf', 'luf', 'wrr', 'rbn'
+    # ]
 
-    # Plot the results
-    paths = [
-        {
-            "label": "random",
-            "path": "./results/heuristics/random.csv",
-            "color": "red"
-        },
-        {
-            "label": "round robin",
-            "path": "./results/heuristics/round_robin.csv",
-            "color": "blue"
-        },
-        {
-            "label": "greedy",
-            "path": "./results/heuristics/greedy.csv",
-            "color": "black"
-        },
-        {
-            "label": "greedy_error",
-            "path": "./results/heuristics/greedy_error.csv",
-            "color": "green"
-        },
 
+    # processes = [Process(target=heuristics.run, args=(m,)) for m in methods]
+    # for p in processes:
+    #     p.start()
+    # for p in processes:
+    #     p.join()
+    print("All Processes are done!!")
+
+    # --- UPDATE PATHS FOR PLOTTING ---
+    paths = [
+        {"label": "Random", "path": "./results/heuristics/random.csv", "color": "red"},
+        {"label": "Round Robin", "path": "./results/heuristics/round_robin.csv", "color": "blue"},
+        {"label": "Greedy (Earliest Available)", "path": "./results/heuristics/greedy.csv", "color": "black"},
+        {"label": "Greedy + Error", "path": "./results/heuristics/greedy_error.csv", "color": "green"},
+        {"label": "Earliest Completion Time (ECT)", "path": "./results/heuristics/ect.csv", "color": "purple"},
+        {"label": "Shortest Job First (SJF)", "path": "./results/heuristics/sjf.csv", "color": "orange"},
+        {"label": "Lowest Utilization First (LUF)", "path": "./results/heuristics/luf.csv", "color": "cyan"},
+        {"label": "Weighted Round Robin (WRR)", "path": "./results/heuristics/wrr.csv", "color": "magenta"},
+        {"label": "Random Best-3 (RBN)", "path": "./results/heuristics/rbn.csv", "color": "brown"},
     ]
-    heuristics._plot_results(paths)
+    # --- END UPDATE ---
+    heuristics._plot_results(paths)
\ No newline at end of file
diff --git a/zolution/TASK.jpg b/zolution/TASK.jpg
new file mode 100644
index 0000000..5fdfd64
Binary files /dev/null and b/zolution/TASK.jpg differ
diff --git a/zolution/optimization_solution.txt b/zolution/optimization_solution.txt
new file mode 100644
index 0000000..257d9a8
--- /dev/null
+++ b/zolution/optimization_solution.txt
@@ -0,0 +1,78 @@
+└──Optimization problems
+    |
+    └── Generalized Assignment Problem (GAP)
+
+✅✅
+
+└──Objective Functions:
+
+    These are the values your algorithm actively 
+    tries to minimize or maximize when making a decision. 
+
+    ├──Time-Based Objectives
+    |   ├──Makespan: "How quickly can we finish this entire batch of tasks?
+    |   ├──Total Completion Time (Flow Time): The sum of the time each task spends in the system from arrival to completion.
+    |   └──Total Tardiness: Used when tasks have deadlines. It measures how late tasks are, ignoring those that finish on time.
+    |
+    ├──Quality & Cost-Based Objectives
+    |   ├──Total Error / Failure Probability: Minimizes the sum of errors for all assignments
+    |   └──Total Execution Cost: If running on different QCs has a monetary cost
+    |
+    └──Multi-Objective Functions
+        └──Weighted Sum:you care about both time and quality. 
+                A multi-objective function combines these using weights to reflect your priorities.
+
+                a * (normalized time metric) + b * (normalized Error Metric)
+
+
+
+└──solutions:
+
+
+└── Algorithms
+    │
+    ├── Classification by GUARANTEE
+    │   ├── Exact Algorithm (Perfect solution)
+    │   ├── Approximation Algorithm (Provably good solution, 10% worst than optimal)
+    │   └── Heuristic (A 'good enough' shortcut with no guarantee)
+    │       ├── Simple Heuristics (Human-designed rules)
+    │       ├── Metaheuristics (Strategies to guide simple heuristics)
+    │       └── Machine Learning Models (Learned, data-driven heuristics)
+    |
+    |
+    └── Classification by DESIGN PARADIGM
+        ├── Brute-force (Often used for Exact algorithms)
+        ├── Greedy (Can be Exact for some problems, a Heuristic for others)
+        ├── Divide and Conquer (A powerful paradigm for creating Exact algorithms)
+        ├── Dynamic Programming (Another powerful paradigm for Exact optimization)
+        ├── Backtracking
+        └── Randomized Algorithms (Often used in advanced Heuristics)
+
+
+
+
+
+Evaluation Metrics:
+    These are the Key Performance Indicators (KPIs) you use after a simulation
+    or run to analyze and compare the performance of different dispatching strategies.
+
+    ├──System Performance Metrics
+    |   ├──Average Wait Time: The average time tasks spent in the queue before execution.
+    |   ├──Average Turnaround Time: The average time from a task's submission to its completion
+    |   ├──Throughput: The number of tasks completed per unit of time (e.g., tasks per hour)
+    |   └──Makespan:Also used as an evaluation metric to see how long the whole process took.
+    |
+    ├──System Utilization Metrics
+    |   ├──Average QC Utilization: The percentage of time that the quantum computers are busy 
+    |   └──Queue Length Statistics
+    |       ├──Average Queue Length
+    |       ├──Maximum Queue Length
+    |       └──Variance in Queue Length
+    |
+    └──Quality and Fairness Metrics
+        ├──Success Rate: The percentage of executed tasks that were expected to succeed based on fidelity.
+        ├──Number of Failed Tasks: A direct count of tasks that were assigned to a QC but failed.
+        ├──Starvation: The number of tasks that had to wait longer than a certain threshold.
+        └──Jain's Fairness Index:if resources are distributed equitably among different users or task types.
+     
+
diff --git a/zolution/resultheuristic.png b/zolution/resultheuristic.png
new file mode 100644
index 0000000..513dd5c
Binary files /dev/null and b/zolution/resultheuristic.png differ
diff --git a/zolution/shahrivar.present.odt b/zolution/shahrivar.present.odt
new file mode 100644
index 0000000..c389a52
Binary files /dev/null and b/zolution/shahrivar.present.odt differ