Source code for palaestrai.experiment.termination_condition

from __future__ import annotations
from typing import TYPE_CHECKING, Optional, Union, Any, Tuple

from abc import ABC
from palaestrai.types import SimulationFlowControl

if TYPE_CHECKING:
    import palaestrai.environment
    from palaestrai.agent import Brain
    from palaestrai.experiment import RunGovernor
    from palaestrai.environment import Environment
    from palaestrai.core.protocol import (
        SimulationControllerTerminationRequest,
        MuscleUpdateRequest,
    )



[docs]
class TerminationCondition(ABC):
    """Control execution flow of simulations.

    Termination conditions control the flow of the simulation execution. For
    every ::`palaestrai.envrionment.Environment` update
    and every ::`palaestrai.agent.Brain` update,
    the configured termination conditions are queried.
    They then return a flow control indicator (::`SimulationFlowControl`).

    This base class offers default implementations for two situations:

    * ::`TerminationCondition.brain_flow_control`
      is called after an agent's ::`Brain` has received a ::`Muscle` update
      and had time to think about it.
    * ::`TerminationCondition.environment_flow_control`
      is called after an environment update.

    The ::`SimulationFlowControl` enum defines a number of constants. They are
    ordered, i.e., ::`SimulationFlowControl.CONTINUE` has the lowest priority,
    whereas ::`SimulationFlowControl.STOP` has the highest. The indicator
    with the highest priority wins overall, i.e., if one agent indicates that
    the simulation should stop, then it will terminate the current experiment
    run phase.
    """


[docs]
    def brain_flow_control(
        self, brain: Brain, message: MuscleUpdateRequest
    ) -> Tuple[SimulationFlowControl, Any]:
        """Allows a learning process to control the simulation flow.

        A learner can control the simulation, e.g., by indicating that the
        simulation should be reset or can end when it has become good enough.
        Descendant classes can reimplement this method. They will receive
        access to the respective agent's ::`Brain`, which contains all the
        necessary information (e.g., its memory, training success, etc.)

        Parameters
        ----------

        brain : ::`Brain`
            The ::`Brain` of the current agent, which can be used to query
            information about the agent's current performance.
        message : ::`MuscleUpdateRequest`
            The message that triggered evaluation of the termination
            condition, which can be used, e.g., to retrieve the UID of the
            current rollout worker.

        Returns
        -------
        Tuple of ::`SimulationFlowControl` and Any:
            An indicator for simulation control: The flow control indicator
            with the highest priority (i.e., highest value number in the
            enum) wins.
            The second element of the tuple this method returns indicates
            additional data to pass. This can be useful to, e.g., make
            data available from the ::`.brain_flow_control` method to the
            ::`.phase_flow_control` method.
        """
        return SimulationFlowControl.CONTINUE, None



[docs]
    def environment_flow_control(
        self, environment: palaestrai.environment.Environment
    ) -> Tuple[SimulationFlowControl, Any]:
        """Allows an environment to control the simulation flow.

        The logic is the same as for ::`.brain_flow_control`, except that an
        environment is now checked.
        The default implementation is to reset the run when the environment is
        done (::`palaestrai.environment.Environment.done`).

        Returns
        -------
        Tuple of ::`SimulationFlowControl` and Any:
            Same logic as for the ::`.brain_flow_control` method
        """
        return (
            SimulationFlowControl.RESTART
            if environment.done
            else SimulationFlowControl.CONTINUE
        ), None



[docs]
    def phase_flow_control(
        self,
        run_governor: RunGovernor,
        message: SimulationControllerTerminationRequest,
    ) -> Tuple[SimulationFlowControl, Any]:
        """Allows overall control of a simulation phase via the ::`RunGovernor`

        The logic is similar to the of ::`.brain_flow_control`, with the
        exception that this function is called in the ::`RunGovernor`.

        Returns
        -------
        Tuple of ::`SimulationFlowControl` and Any:
            Same logic as for the ::`.brain_flow_control` method
        """
        return SimulationFlowControl.CONTINUE, None



[docs]
    def check_termination(self, message, component=None):
        return False