Skip to main content

4.3 - Code - The Macro Bot Environment

This file contains the complete implementation of the MacroEnv. It builds directly upon the concepts from the WorkerBot but expands the agent's capabilities to include managing supply, forcing it to learn a more complex economic policy.

This code translates the design specification from the preceding sections into a functional gymnasium environment.

Implementation Checklist

  • MacroBot (BotAI subclass):
    • Handles a new action (action == 2) for building Supply Depots.
    • Implements the event-based penalty for becoming supply-blocked by tracking the previous step's supply state.
    • Provides the new, expanded 4-feature observation vector.
    • Updates the termination condition to a higher worker count (30).
  • MacroEnv (gymnasium.Env subclass):
    • Defines the expanded action_space as Discrete(3).
    • Defines the expanded observation_space as a Box with a shape of (4,).

macro_bot.py

import numpy as np
import gymnasium as gym
from gymnasium.spaces import Box, Discrete
import multiprocessing as mp
from queue import Empty

from sc2.bot_ai import BotAI
from sc2.race import Race
from sc2.ids.unit_typeid import UnitTypeId
from sc2_gym_env import SC2GymEnv

# --- The BotAI Implementation (Runs in the Game Process) ---

class MacroBot(BotAI):
"""
The BotAI actor for learning a simple macro trade-off:
when to build a worker vs. when to build a supply depot.
"""
def __init__(self, action_queue: mp.Queue, obs_queue: mp.Queue[tuple]):
super().__init__()
self.action_queue = action_queue
self.obs_queue = obs_queue
# We need to track the last step's supply to penalize the frame we get blocked.
self.last_supply_left = 15

async def _handle_action(self, action: int) -> float:
"""
Executes the agent's action and calculates the sparse reward.
Returns the reward for the action.
"""
# Action 1: Build SCV
if action == 1 and self.can_afford(UnitTypeId.SCV) and self.townhalls.idle.exists:
self.train(UnitTypeId.SCV)
return 1.0

# Action 2: Build Supply Depot
elif action == 2 and self.can_afford(UnitTypeId.SUPPLYDEPOT) and not self.already_pending(UnitTypeId.SUPPLYDEPOT):
# We need to find a valid location to build the depot.
build_location = await self.find_placement(UnitTypeId.SUPPLYDEPOT, near=self.start_location, placement_step=5)
if build_location:
await self.build(UnitTypeId.SUPPLYDEPOT, build_location)
# Slightly higher reward to incentivize this crucial action.
return 2.0

# Return 0 reward if the action was "Do Nothing" or was impossible.
return 0.0

async def on_step(self, iteration: int):
"""
The main game loop, throttled to interact with the agent every 8 steps.
"""
if iteration % 8 != 0:
return

try:
action = self.action_queue.get(timeout=1)

# Get the sparse reward from executing the action.
reward = await self._handle_action(action)

# Add an event-based penalty for getting supply blocked.
# This is a strong signal to teach the agent to avoid this state.
if self.supply_left == 0 and self.last_supply_left > 0:
reward -= 10.0
self.last_supply_left = self.supply_left

# Get the observation for the next state.
observation = np.array([
min(self.minerals, 1000) / 1000.0,
self.workers.amount / 50.0,
self.supply_used / 200.0,
self.supply_cap / 200.0
], dtype=np.float32)

# Define the termination condition.
terminated = self.workers.amount >= 30

# Send the data to the agent.
self.obs_queue.put((observation, reward, terminated, False, {}))

if terminated:
await self.client.leave()

except Empty:
print("Action queue was empty. Assuming training has ended.")
await self.client.leave()
return

# --- The Gymnasium Environment (Runs in the Main Process) ---

class MacroEnv(SC2GymEnv):
"""
The Gymnasium Wrapper for the MacroBot.
"""
def __init__(self):
super().__init__(bot_class=MacroBot, map_name="AcropolisLE")

# Action space: 0=Nothing, 1=Build SCV, 2=Build Supply Depot
self.action_space = Discrete(3)

# Observation space: A 1D array with 4 normalized float values
self.observation_space = Box(
low=0.0,
high=1.0,
shape=(4,),
dtype=np.float32
)