Source code for palaestrai.agent.muscle

from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Tuple, Dict

import uuid
import logging
from abc import ABC, abstractmethod

from palaestrai.types import Mode
from .memory import Memory
from .brain_dumper import BrainDumper

if TYPE_CHECKING:
    from palaestrai.agent import (
        SensorInformation,
        ActuatorInformation,
    )

LOG = logging.getLogger(__name__)



[docs]
class Muscle(ABC):
    """An acting entity in an environment.

    Each Muscle is an acting entity in an environment: Given a sensor input,
    it proposes actions. Thus, Muscles implement input-to-action mappings.
    A muscle does, however, not learn by itself; for that, it needs a
    :class:`Brain`. Every time a Muscle acts, it sends the following inputs
    to a :class:`Brain`:

    * Sensor inputs it received
    * actuator set points it provided
    * reward received from the proposed action.

    When implementing an algorithm, you have to derive from the Muscle ABC and
    provide the following methods:

    #. :func:`~propose_actions`, which implements the input-to-action mapping
    #. :func:`~update`, which handles how updates from the :class:`Brain` are
       incorporated into the muscle.
    """

    def __init__(
        self,
        *args,
        **kwargs,
    ):
        self._uid: str = f"Muscle-{str(uuid.uuid4())[-6:]}"
        self._mode: Mode = Mode.TRAIN
        self._memory: Memory = Memory()
        self._model_loaders: List[BrainDumper] = []
        self._statistics: Dict[str, Any] = {}

    @property
    def uid(self):
        """Unique user-defined ID of this Muscle

        This is the name of the agent, i.e., what has been defined by a
        user in an :class:`ExperimentRun` file.

        Returns
        -------
        uid: str
            The user-defined name of the Muscle
        """
        return self._uid

    @property
    def mode(self) -> Mode:
        """Internal mode of operations

        Usually, an agent operates under the assumption of a certain modus
        operandi.
        This can be, for example, the distinction between training (
        ::`Mode.TRAIN`) and testing (::`Mode.TEST`).

        Returns
        -------
        ::`Mode`
            The agent's operations mode
        """
        return self._mode

    @property
    def memory(self) -> Memory:
        """Muscle :class:`Memory`.

        Each Muscle can have its own, personal :class:`Memory`.
        Internally, the memory stores sensor readings, actuator setpoints
        provided by the Muscle, as well as rewards from the environment and
        the result of the Muscle's (i.e., Agent's) objective function.

        Return
        ------
        Memory
            The Muscle :class:`Memory`.
        """
        assert self._memory is not None
        return self._memory


[docs]
    def setup(self):
        """Generic setup method, called just before ::`~Muscle.run`

        This method is called just before the main loop in ::`~Muscle.run`
        commences. It can be used for any setup tasks. The method is
        guranteed to be called in the same process as the main loop. Also, the
        communications link to the brain will already be established.
        However, there are no information about the environment available yet.

        There is no need to load the muscle's inference model here;
        refer to ::`~Muscle.prepare_model` for this.
        """
        pass



[docs]
    @abstractmethod
    def propose_actions(
        self,
        sensors: List[SensorInformation],
        actuators_available: List[ActuatorInformation],
    ) -> Tuple[List[ActuatorInformation], Any]:
        """Process new sensor information and produce actuator setpoints.

        This method provides the essential inference task of the Muscle:
        It takes current sensor information and is expected to produce a
        list of actuator setpoints that can be applied in the ::`Environment`.
        How the actuator values are produced and how the sensor information
        are processed is up to the developer.

        This is the essential abstract method that needs to be implemented by
        every Muscle.

        Sensor readings and the list of available actuators are valid for the
        current time.
        Previous sensor readings, rewards, and objective value can be
        retrieved from the Muscle's ::`Memory`,
        which is accessible through the ::`Muscle.memory` property.

        Parameters
        ----------
        sensors : list of SensorInformation
            List of new SensorInformation for all available sensors
        actuators_available : list of ActuatorInformation
            List of all actuators that are *currently* available to the agent

        Returns
        -------
        tuple of two elements
            A Tuple containing: (1) The actual setpoints (an list of
            ::`ActuatorSetpoint` objects), for which it is allowed to simply
            use the objects that are passed as parameters, deep-copying is not
            necessary; (2) any other data that should be sent to the
            Muscle's ::`Brain`.
        """
        pass



[docs]
    def update(self, update: Any):
        """Update the Muscle.

        This method is called if the brain sends an update.
        What is to be updated is up to the specific implementation.
        However, this method should update all necessary components.

        There might be implementations of :class:`Brain` and Muscles where
        updates do not happen.
        Simple, static bots never learn, and, therefore, do not need a
        mechanism for updates.
        Therefore, the default implementation of this method is simply to
        not do anything.

        Parameters
        ----------
        update: any
            Any data that a :class:`Brain` would send to its Muscles upon an
            update. Implementation-specific.
        """
        pass



[docs]
    def reset(self):
        """Called in order to reset the Muscle.

        There is a number of occasions in which the Muscle should stay active,
        but reset.
        For example, when a new episode of the same experiment run phase is
        started.
        Then, the Muscle is allowed (or better, encouraged) to keep its state,
        but acknowledge that a reset has occured and the Muscle does not expect
        the seamless continuation of an episode.
        Implementing this method is optional; if it is not implemented, nothing
        will happen on reset and the Muscle will also be kept as-is.
        """
        pass


    def load(self, tag: str) -> Any:
        bio = BrainDumper.load_brain_dump(self._model_loaders, tag)
        try:
            assert bio is not None
            bio.seek(0)
        except Exception:
            # We just want to be nice and serviceable. If it isn't possible,
            #   don't fret, no harm done.
            pass
        return bio


[docs]
    def prepare_model(self):
        """Loading a trained model for testing

        This method loads dumped brain states from a given previous phase, or
        even experiment run. For details, see the documentation on experiment
        run files (the ``load`` key).

        This method is called whenever the current state of a muscle model
        should be restored. How a particular model is deserialized is up to the
        concrete implementation. Also, brains may be divided into sub-models
        (e.g., actor and critic), whose separate storage is realized via tags.
        Implementing this method allows for a versatile implementation of this.

        It is advisable to use the storage facilities of palaestrAI.
        These are available through ::`Muscle.load`. The model location has
        then been pre-set from the experiment run file.
        """
        pass



[docs]
    def add_statistics(self, key: str, value: Any, allow_overwrite=False):
        """Statistics dict

        Each Muscle can have its own statistic metrics, that are calculated
        with each step, i.e., after each call of :func:`propose_actions`.
        The :class:`Brain` can provide occasionally calculated statistics via
        an update to the Muscle. The Muscle then can choose to update its
        statistics for storing.
        """
        assert self._statistics is not None and isinstance(
            self._statistics, Dict
        ), (
            f"Invalid internal variable Muscle._statistics format:"
            f" {type(self._statistics)}, expected non-None 'Dict'"
        )
        assert key is not None and isinstance(
            key, str
        ), f"Invalid key format: {type(key)}, expected 'str'"

        assert allow_overwrite or key not in self._statistics, (
            f"Tried to overwrite statistics for {key}. "
            f"Use 'allow_overwrite' to replace values."
        )
        self._statistics[key] = value



[docs]
    def pop_statistics(self) -> Dict[str, Any]:
        """Returning current statistics and resetting it

        This method returns the statistics dict and clears it afterwards.

        Because the statistics dict should contain metrics that refer
        to one step, it is stored and cleared after each one.

        Returns
        -------
        Dict
            The dict contains a mapping of metric keys to values. This
            dynamically allows various implementation-dependent statistics
            metrics.
        """
        statistics = self._statistics
        self._statistics = {}

        return statistics


    def __str__(self):
        return f"{self.__class__}(id=0x{id(self):x}, uid={self.uid})"