Source code for palaestrai.agent.muscle

from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Tuple, Dict

import uuid
import logging
from abc import ABC, abstractmethod

from palaestrai.types import Mode
from .memory import Memory
from .brain_dumper import BrainDumper

if TYPE_CHECKING:
    from palaestrai.agent import (
        SensorInformation,
        ActuatorInformation,
    )

LOG = logging.getLogger(__name__)


[docs] class Muscle(ABC): """An acting entity in an environment. Each Muscle is an acting entity in an environment: Given a sensor input, it proposes actions. Thus, Muscles implement input-to-action mappings. A muscle does, however, not learn by itself; for that, it needs a :class:`Brain`. Every time a Muscle acts, it sends the following inputs to a :class:`Brain`: * Sensor inputs it received * actuator set points it provided * reward received from the proposed action. When implementing an algorithm, you have to derive from the Muscle ABC and provide the following methods: #. :func:`~propose_actions`, which implements the input-to-action mapping #. :func:`~update`, which handles how updates from the :class:`Brain` are incorporated into the muscle. """ def __init__( self, *args, **kwargs, ): self._uid: str = f"Muscle-{str(uuid.uuid4())[-6:]}" self._mode: Mode = Mode.TRAIN self._memory: Memory = Memory() self._model_loaders: List[BrainDumper] = [] self._statistics: Dict[str, Any] = {} @property def uid(self): """Unique user-defined ID of this Muscle This is the name of the agent, i.e., what has been defined by a user in an :class:`ExperimentRun` file. Returns ------- uid: str The user-defined name of the Muscle """ return self._uid @property def mode(self) -> Mode: """Internal mode of operations Usually, an agent operates under the assumption of a certain modus operandi. This can be, for example, the distinction between training ( ::`Mode.TRAIN`) and testing (::`Mode.TEST`). Returns ------- ::`Mode` The agent's operations mode """ return self._mode @property def memory(self) -> Memory: """Muscle :class:`Memory`. Each Muscle can have its own, personal :class:`Memory`. Internally, the memory stores sensor readings, actuator setpoints provided by the Muscle, as well as rewards from the environment and the result of the Muscle's (i.e., Agent's) objective function. Return ------ Memory The Muscle :class:`Memory`. """ assert self._memory is not None return self._memory
[docs] def setup(self): """Generic setup method, called just before ::`~Muscle.run` This method is called just before the main loop in ::`~Muscle.run` commences. It can be used for any setup tasks. The method is guranteed to be called in the same process as the main loop. Also, the communications link to the brain will already be established. However, there are no information about the environment available yet. There is no need to load the muscle's inference model here; refer to ::`~Muscle.prepare_model` for this. """ pass
[docs] @abstractmethod def propose_actions( self, sensors: List[SensorInformation], actuators_available: List[ActuatorInformation], ) -> Tuple[List[ActuatorInformation], Any]: """Process new sensor information and produce actuator setpoints. This method provides the essential inference task of the Muscle: It takes current sensor information and is expected to produce a list of actuator setpoints that can be applied in the ::`Environment`. How the actuator values are produced and how the sensor information are processed is up to the developer. This is the essential abstract method that needs to be implemented by every Muscle. Sensor readings and the list of available actuators are valid for the current time. Previous sensor readings, rewards, and objective value can be retrieved from the Muscle's ::`Memory`, which is accessible through the ::`Muscle.memory` property. Parameters ---------- sensors : list of SensorInformation List of new SensorInformation for all available sensors actuators_available : list of ActuatorInformation List of all actuators that are *currently* available to the agent Returns ------- tuple of two elements A Tuple containing: (1) The actual setpoints (an list of ::`ActuatorSetpoint` objects), for which it is allowed to simply use the objects that are passed as parameters, deep-copying is not necessary; (2) any other data that should be sent to the Muscle's ::`Brain`. """ pass
[docs] def update(self, update: Any): """Update the Muscle. This method is called if the brain sends an update. What is to be updated is up to the specific implementation. However, this method should update all necessary components. There might be implementations of :class:`Brain` and Muscles where updates do not happen. Simple, static bots never learn, and, therefore, do not need a mechanism for updates. Therefore, the default implementation of this method is simply to not do anything. Parameters ---------- update: any Any data that a :class:`Brain` would send to its Muscles upon an update. Implementation-specific. """ pass
[docs] def reset(self): """Called in order to reset the Muscle. There is a number of occasions in which the Muscle should stay active, but reset. For example, when a new episode of the same experiment run phase is started. Then, the Muscle is allowed (or better, encouraged) to keep its state, but acknowledge that a reset has occured and the Muscle does not expect the seamless continuation of an episode. Implementing this method is optional; if it is not implemented, nothing will happen on reset and the Muscle will also be kept as-is. """ pass
def load(self, tag: str) -> Any: bio = BrainDumper.load_brain_dump(self._model_loaders, tag) try: assert bio is not None bio.seek(0) except Exception: # We just want to be nice and serviceable. If it isn't possible, # don't fret, no harm done. pass return bio
[docs] def prepare_model(self): """Loading a trained model for testing This method loads dumped brain states from a given previous phase, or even experiment run. For details, see the documentation on experiment run files (the ``load`` key). This method is called whenever the current state of a muscle model should be restored. How a particular model is deserialized is up to the concrete implementation. Also, brains may be divided into sub-models (e.g., actor and critic), whose separate storage is realized via tags. Implementing this method allows for a versatile implementation of this. It is advisable to use the storage facilities of palaestrAI. These are available through ::`Muscle.load`. The model location has then been pre-set from the experiment run file. """ pass
[docs] def add_statistics(self, key: str, value: Any, allow_overwrite=False): """Statistics dict Each Muscle can have its own statistic metrics, that are calculated with each step, i.e., after each call of :func:`propose_actions`. The :class:`Brain` can provide occasionally calculated statistics via an update to the Muscle. The Muscle then can choose to update its statistics for storing. """ assert self._statistics is not None and isinstance( self._statistics, Dict ), ( f"Invalid internal variable Muscle._statistics format:" f" {type(self._statistics)}, expected non-None 'Dict'" ) assert key is not None and isinstance( key, str ), f"Invalid key format: {type(key)}, expected 'str'" assert allow_overwrite or key not in self._statistics, ( f"Tried to overwrite statistics for {key}. " f"Use 'allow_overwrite' to replace values." ) self._statistics[key] = value
[docs] def pop_statistics(self) -> Dict[str, Any]: """Returning current statistics and resetting it This method returns the statistics dict and clears it afterwards. Because the statistics dict should contain metrics that refer to one step, it is stored and cleared after each one. Returns ------- Dict The dict contains a mapping of metric keys to values. This dynamically allows various implementation-dependent statistics metrics. """ statistics = self._statistics self._statistics = {} return statistics
def __str__(self): return f"{self.__class__}(id=0x{id(self):x}, uid={self.uid})"