Skip to main content

2.2 - Code - The Reusable SC2GymEnv Wrapper

This file provides the complete, reusable implementation of the SC2GymEnv class. This class is the architectural cornerstone of our project, acting as the adapter that allows the synchronous stable-baselines3 library to communicate with the asynchronous python-sc2 game engine.

Save this code as sc2_gym_env.py. It will serve as the base class for all specific RL environments we create.

Class Responsibilities

  • API Adherence: Implements the standard gymnasium.Env interface.
  • Process Management: Manages the lifecycle of the python-sc2 game process, including startup and cleanup.
  • Communication Bridge: Orchestrates the flow of actions and observations between the two processes using multiprocessing queues.

sc2_gym_env.py

import gymnasium as gym
import multiprocessing as mp
from typing import Optional, Type, Tuple, Any

from sc2.bot_ai import BotAI
from sc2.data import Race, Difficulty
from sc2.main import run_game
from sc2 import maps
from sc2.player import Bot, Computer


# Define a more specific type hint for items passed through the observation queue.
# This tuple represents: (observation, reward, terminated, truncated, info)
ObservationQueueItem = Tuple[Any, float, bool, bool, dict]

class SC2GymEnv(gym.Env):
"""
A generic Gymnasium wrapper for the StarCraft II environment.

This class acts as a bridge between a synchronous RL agent (stable-baselines3)
and the asynchronous python-sc2 library. It runs the game in a separate
process and facilitates communication via multiprocessing queues.
"""

def __init__(
self,
bot_class: Type[BotAI],
map_name: str,
bot_race: Race,
opponent_race: Race = Race.Random,
difficulty: Difficulty = Difficulty.Easy
):
"""
Initializes the SC2 Gymnasium environment.

Args:
bot_class: The user-defined BotAI class to be run in the game process.
map_name: The name of the StarCraft II map to be used.
bot_race: The race of the agent's bot.
opponent_race: The race of the computer opponent.
difficulty: The difficulty of the computer opponent.
"""
super().__init__()
self.bot_class = bot_class
self.map_name = map_name
self.bot_race = bot_race
self.opponent_race = opponent_race
self.difficulty = difficulty

# Queues for inter-process communication
self.action_queue: mp.Queue = mp.Queue()
self.obs_queue: mp.Queue[ObservationQueueItem] = mp.Queue()

self.game_process: Optional[mp.Process] = None

def reset(
self, *, seed: Optional[int] = None, options: Optional[dict] = None
) -> Tuple[Any, dict]:
"""
Resets the environment to start a new episode.

Terminates any existing game process, starts a fresh one, and waits
for the initial observation from the game.
"""
# It's required to call the superclass's reset method for proper seeding
super().reset(seed=seed)

if self.game_process and self.game_process.is_alive():
self.game_process.terminate()

self.game_process = mp.Process(
target=self._run_game_process,
args=(
self.bot_class,
self.map_name,
self.bot_race,
self.opponent_race,
self.difficulty,
self.action_queue,
self.obs_queue
)
)
self.game_process.start()

# Block and wait for the initial observation from the new game process
initial_obs, _, _, _, info = self.obs_queue.get()
return initial_obs, info

def step(self, action: Any) -> ObservationQueueItem:
"""
Executes one time step within the environment.

The agent's action is sent to the game process, and the wrapper
waits for the resulting state information to be returned.
"""
self.action_queue.put(action)
return self.obs_queue.get()

def close(self):
"""Performs any necessary cleanup. Terminates the game process."""
if self.game_process and self.game_process.is_alive():
self.game_process.terminate()

@staticmethod
def _run_game_process(
bot_class: Type[BotAI],
map_name: str,
bot_race: Race,
opponent_race: Race,
difficulty: Difficulty,
action_queue: mp.Queue,
obs_queue: mp.Queue[ObservationQueueItem]
):
"""
The target function for the separate game process. Do not call directly.
"""
# The BotAI class is instantiated here, inside the new process.
# It is passed the queues for communication.
bot_instance = bot_class(action_queue=action_queue, obs_queue=obs_queue)

# This is a blocking call that runs the main python-sc2 game loop.
run_game(
maps.get(map_name),
[
Bot(bot_race, bot_instance),
Computer(opponent_race, difficulty)
],
realtime=False,
)