Source code for palaestrai.environment.dummy_environment

"""
This module contains the class :class:`DummyEnvironment`. It could be
used in an experiment for reference purposes.
"""

import numpy as np
import random
from typing import List, Callable

from palaestrai.agent import SensorInformation, ActuatorInformation
from palaestrai.types import Discrete, Box
from .environment import Environment
from ..agent.reward_information import RewardInformation


[docs] class DummyEnvironment(Environment): """ This class provides a dummy environment with a fixed number of sensors. The environment terminates after a fixed number of updates. Parameters ---------- connection : broker_connection the URI which is used to connect to the simulation broker. It is used to communicate with the simulation controller. uid : uuid4 a universal id for the environment seed : int Seed for recreation discrete : bool, optional If set to True, the environment will only use discrete spaces. Otherwise, the spaces are continuous. Default is `True`. """ def __init__( self, uid: str, broker_uri: str, seed: int, discrete: bool = True ): super().__init__(uid, broker_uri, seed) self.iter: int = 0 self.discrete: bool = discrete
[docs] def start_environment(self): """ This method is called when an `EnvironmentStartRequest` message is received. This dummy environment is represented by 10 sensors and 10 actuators. The sensors are of the type `SensorInformation` and have a random value of either 0 or 1, an `observation_space` between 0 and 1 and an integer number as id. The actuators are of the type `ActuatorInformation` and contain a value of Discrete(1), a `space` of None and an integer number as id. Returns ------- tuple : A list containing the `SensorInformation` for each of the 10 sensors and a list containing the `ActuatorInformation` for each of the 10 actuators. """ self.iter = 0 sensors = [] actuators = [] for num in range(10): sensors.append(self._create_sensor(num)) actuators.append(self._create_actuator(num)) self.sensors = sensors self.actuators = actuators return sensors, actuators
[docs] def update(self, actuators): """ This method is called when an `EnvironmentUpdateRequest` message is received. While values of the actuators manipulate an actual environment, in here those values have no impact on the behavior of the dummy environment. The state of this dummy environment is represented via random values of the `SensorInformation` from the 10 sensors. In this dummy environment the reward for the state is a random value of either 0 or 1. The method returns a list of `SensorInformation`, the random reward and the boolean `is_terminal`. After 10 updates the `is_terminal` value is set to True which triggers the respective shutdown messages. Parameters ---------- actuators : list[`ActuatorInformation`] A list of `ActuatorInformation` to interact with the environment. Returns ------- tuple : A list of `SensorInformation` representing the 10 sensors, the reward and boolean for `is_terminal`. """ sensors = [] for num in range(10): sensors.append(self._create_sensor(num)) # reward = [RewardInformation(9000, Discrete(9000), "Reward")] self.iter += 1 if self.iter < 10: return sensors, self.create_reward(), False else: return sensors, self.create_reward(), True
def _create_actuator(self, actuator_id): if self.discrete: return ActuatorInformation( space=Discrete(100), value=0, uid=f"{actuator_id}", ) else: return ActuatorInformation( space=Box(0, 10, shape=()), value=0, uid=f"{actuator_id}", ) def create_reward(self): print(self.iter) return [ RewardInformation(self.iter - 1 % 9000, Discrete(9000), "Reward") ] def _create_sensor(self, sensor_id): if self.discrete: return SensorInformation( value=self.iter % 100, space=Discrete(100), uid=f"{sensor_id}", ) else: return SensorInformation( value=random.randint(0, 1), space=Box(0, 2, shape=()), uid=f"{sensor_id}", )