Unityではじめる機械学習・強化学習 Unity ML-Agents実践ゲームプログラミングその3

CHAPTER 3 はじめての学習環境の作成

この章では迷路を通して機械学習を学んでいきます。また、「Academy」「Brain」「Agent」の三つの要素を理解し、それぞれを役割を学んでいきます。

f:id:yamakasa3:20180813215030p:plain

~~続きは今度。~~

ML-AgentsのフォルダをAssetsに保存して、TensorFlowSharpプラグインをimportします。

Academyの設定

Academy は学習環境の管理をするオブジェクトであり、ステップ数やレンダリングの設定なだを行います。

f:id:yamakasa3:20180814231449p:plain

Brainの設定

Brain はAgentが観測した状態に応じて鼓動を決定するオブジェクトです。TensorFlowを使用することもありますが、人間の入力で決定することもできます。

f:id:yamakasa3:20180814232640p:plain

Agentの設定

迷路に対応したAgentのスクリプトを用意する必要があります。

報酬は $[ -1, 1 ]$ の範囲で正規化するのが一般的です。AddReward()メソッドで報酬を与えています。また、ステップ毎に $-0.01$ の報酬が与えられているのは、より短手数でゴールに到達することが評価されるためです。

MazeAgent.cs

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using MLAgents;

public class MazeAgent : Agent {

    // マップ(0: 平地、1: 宝箱、2: ワナ、9: 壁)
    static readonly int[,] MAP = {
        {0, 0, 0, 1},
        {0, 9, 0, 0},
        {0, 0, 0, 2}
    };

    GameObject robo;    // ロボット
    int x;  // X座標
    int y;  // Y座標

    // Stateの取得
    public override void CollectObservations() {
        AddVectorObs(this.x / 4f);
        AddVectorObs(this.y / 3f);
    }

    // フレームごとに呼ばれる
    public override void AgentAction(float[] vectorAction, string textAction) {
        // Monitorの表示
        Monitor.verticalOffset = 80;
        Monitor.Log("Reward", "" + this.GetCumulativeReward(), this.gameObject.transform);

        // エピソード完了  1: 宝箱 2: ワナ
        if(MAP[this.y, this.x] == 1 || MAP[this.y, this.x] == 2) {
            Done();     // エピソード完了
            return;
        }

        // Actionの取得
        int action = (int)vectorAction[0];
        if(action < 0) {
            return;
        }

        // ロボットの移動
        MoveRobo(action);

        // 報酬の指定    1: 宝箱 2: ワナ
        if (MAP[this.y, this.x] == 1) {
            AddReward(1.0f);
        }else if(MAP[this.y, this.x] == 2) {
            AddReward(-1.0f);
        } else {
            // ステップ毎
            AddReward(-0.01f);
        }
    }

    // リセット時に呼ばれる
    public override void AgentReset() {
        // ロボットの参照の取得
        robo = GameObject.Find("robo");

        // ロボットの位置の指定
        SetRoboPosition(0, 1);
    }

    // ロボットの移動
    void MoveRobo(int action) {
        // 移動先の計算
        int dx = this.x;
        int dy = this.y;
        if (action == 0) { dy--; } // 上
        if (action == 1) { dy++; } // 下
        if (action == 2) { dx--; } // 左
        if (action == 3) { dx++; } // 右

        // 画面外または壁のときは移動しない
        if(dx < 0 || 3 < dx || dy < 0 || 2 < dy || MAP[dy, dx] == 9) {
            return;
        }

        // ロボットの位置の指定
        SetRoboPosition(dx, dy);
    }

    // ロボットの位置の指定
    void SetRoboPosition(int x, int y) {
        this.x = x;
        this.y = y;
        Vector2 pos = this.robo.transform.position;
        pos.x = (240 - 960) / 2 + x * 240;
        pos.y = -(240 - 720) / 2 - y * 240;
        this.robo.transform.position = pos;
    }

    // GUIの処理
    void OnGUI() {
        // Monitorの文字色
        GUI.skin.label.normal.textColor = Color.blue;
    }
}