4.3 - Code - The Macro Bot Environment
This file contains the complete implementation of the MacroEnv
. It builds directly upon the concepts from the WorkerBot
but expands the agent's capabilities to include managing supply, forcing it to learn a more complex economic policy.
This code translates the design specification from the preceding sections into a functional gymnasium
environment.
Implementation Checklist
-
MacroBot
(BotAI
subclass):- Handles a new action (
action == 2
) for building Supply Depots. - Implements the event-based penalty for becoming supply-blocked by tracking the previous step's supply state.
- Provides the new, expanded 4-feature observation vector.
- Updates the termination condition to a higher worker count (30).
- Handles a new action (
-
MacroEnv
(gymnasium.Env
subclass):- Defines the expanded
action_space
asDiscrete(3)
. - Defines the expanded
observation_space
as aBox
with a shape of(4,)
.
- Defines the expanded
macro_bot.py
import numpy as np
import gymnasium as gym
from gymnasium.spaces import Box, Discrete
import multiprocessing as mp
from queue import Empty
from sc2.bot_ai import BotAI
from sc2.race import Race
from sc2.ids.unit_typeid import UnitTypeId
from sc2_gym_env import SC2GymEnv
# --- The BotAI Implementation (Runs in the Game Process) ---
class MacroBot(BotAI):
"""
The BotAI actor for learning a simple macro trade-off:
when to build a worker vs. when to build a supply depot.
"""
def __init__(self, action_queue: mp.Queue, obs_queue: mp.Queue[tuple]):
super().__init__()
self.action_queue = action_queue
self.obs_queue = obs_queue
# We need to track the last step's supply to penalize the frame we get blocked.
self.last_supply_left = 15
async def _handle_action(self, action: int) -> float:
"""
Executes the agent's action and calculates the sparse reward.
Returns the reward for the action.
"""
# Action 1: Build SCV
if action == 1 and self.can_afford(UnitTypeId.SCV) and self.townhalls.idle.exists:
self.train(UnitTypeId.SCV)
return 1.0
# Action 2: Build Supply Depot
elif action == 2 and self.can_afford(UnitTypeId.SUPPLYDEPOT) and not self.already_pending(UnitTypeId.SUPPLYDEPOT):
# We need to find a valid location to build the depot.
build_location = await self.find_placement(UnitTypeId.SUPPLYDEPOT, near=self.start_location, placement_step=5)
if build_location:
await self.build(UnitTypeId.SUPPLYDEPOT, build_location)
# Slightly higher reward to incentivize this crucial action.
return 2.0
# Return 0 reward if the action was "Do Nothing" or was impossible.
return 0.0
async def on_step(self, iteration: int):
"""
The main game loop, throttled to interact with the agent every 8 steps.
"""
if iteration % 8 != 0:
return
try:
action = self.action_queue.get(timeout=1)
# Get the sparse reward from executing the action.
reward = await self._handle_action(action)
# Add an event-based penalty for getting supply blocked.
# This is a strong signal to teach the agent to avoid this state.
if self.supply_left == 0 and self.last_supply_left > 0:
reward -= 10.0
self.last_supply_left = self.supply_left
# Get the observation for the next state.
observation = np.array([
min(self.minerals, 1000) / 1000.0,
self.workers.amount / 50.0,
self.supply_used / 200.0,
self.supply_cap / 200.0
], dtype=np.float32)
# Define the termination condition.
terminated = self.workers.amount >= 30
# Send the data to the agent.
self.obs_queue.put((observation, reward, terminated, False, {}))
if terminated:
await self.client.leave()
except Empty:
print("Action queue was empty. Assuming training has ended.")
await self.client.leave()
return
# --- The Gymnasium Environment (Runs in the Main Process) ---
class MacroEnv(SC2GymEnv):
"""
The Gymnasium Wrapper for the MacroBot.
"""
def __init__(self):
super().__init__(bot_class=MacroBot, map_name="AcropolisLE")
# Action space: 0=Nothing, 1=Build SCV, 2=Build Supply Depot
self.action_space = Discrete(3)
# Observation space: A 1D array with 4 normalized float values
self.observation_space = Box(
low=0.0,
high=1.0,
shape=(4,),
dtype=np.float32
)