Creating Gym Environment

In this step, we define gym.Env class that serves as interface between a Reinforcement Learning algorithm and the Godot application.

Note

Any environment you develop should be inhereted from godot_gym_api.GodotEnvironment class.

In your project directory create a file apple_seeker_env.py.

Import required packages as follows:

from typing import Any, Dict, Tuple

import numpy as np
from gymnasium import spaces

from godot_gym_api import GodotEnvironment

# Protobuf message we have created earlier
import message_pb2 as protobuf_message_module

Create a class inhereted from godot_gym_api.GodotEnvironment:

class AppleSeekerEnv(GodotEnvironment):
    def __init__(
        self,
        engine_address: Tuple[str, int] = ("127.0.0.1", 9090),
        engine_chunk_size: int = 65536,
        episode_length: int = 500,
    ):
        super().__init__(protobuf_message_module, engine_address, engine_chunk_size)
        self._episode_length = episode_length
        # Define observation space in accordance to the agent in Godot app.
        self._max_distance = 5
        self.observation_space = spaces.Dict(
            spaces={
                "distances_to_obstacle": spaces.Box(
                    low=0,
                    high=self._max_distance,
                    shape=[16,],
                    dtype=np.float32,
                ),
                "distances_to_target": spaces.Box(
                    low=0,
                    high=self._max_distance,
                    shape=[16,],
                    dtype=np.float32,
                ),
            },
        )
        # The example Godot app return all its observation for agent and world. There is no need to specife them.
        self._requested_observation = {self.AGENT_KEY: [], self.WORLD_KEY: []}
        # Define action space in accordance to the agent in Godot app.
        self.action_space = spaces.Discrete(4)
        # Set during reset
        self._step_counter = None

    def _observe(self, state: Dict[str, Dict[str, Any]]) -> Dict:
        observation = {
            "distances_to_obstacle": np.asarray(state[self.AGENT_KEY].distances_to_obstacle),
            "distances_to_target": np.asarray(state[self.AGENT_KEY].distances_to_target),
        }
        return observation

    def _is_terminated(self, state) -> bool:
        episode_steps_limit_reached = self._step_counter == self._episode_length
        apple_caught = state[self.WORLD_KEY].apple_caught
        return episode_steps_limit_reached or apple_caught

    def _reward_function(self, state):
        if state[self.WORLD_KEY].apple_caught:
            return 100
        else:
            return -min(np.asarray(state[self.AGENT_KEY].distances_to_target)) / self._max_distance

    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
        state = self._godot_step(action.item())
        self._step_counter += 1
        observation = self._observe(state)
        terminated = self._is_terminated(state)
        reward = self._reward_function(state)
        return observation, reward, terminated, False, {}

    def reset(self, *args, **kwargs) -> np.ndarray:
        self._step_counter = 0
        state = self._godot_reset()
        observation = self._observe(state)
        return observation, {}

    def seed(self, *args, **kwargs):
        pass

    def render(self):
        pass

    def close(self):
        pass