[Unity] 퍼포 더 코기 (ML-Agents) 강화학습 공부

퍼포 더 코기 : Unity ML-에이전트 툴킷으로 탄생한 재롱둥이 – Unity Blog

게임 제작은 게임 컨셉 및 로직 정의, 에셋 및 애니메이션 제작, NPC 동작 지정, 난이도 및 밸런스 조정, 출시 전 실제 플레이어를 통한 게임 테스트 등 여러 까다로운 작업이 수반되는 창조적 과정입니다. 유니티는 이러한 전체 제작 과정에서 머신러닝을 활용하는 것이 가능하다고...

blogs.unity3d.com

유니티 블로그에 ML-Agents 예제로 올라온 퍼포 더 코기에 대해서 정리를 해봤다.

코기가 타깃인 나무막대를 물어오는 예제인데 재미있는 점은 코기가 움직이는 방식

걷기, 달리기 점프, 물어오기와 같은 행동까지 학습을 했다는 점이었다.

유니티 블로그에서 퍼포 더 코기 유니티 프로젝트를 다운로드하고

기존 유니티 ML-Agents SDK를 다운로드한 곳에서 새로 프로젝트를 만들었다.

기존 퍼포 더 코기의 프로젝트에서 Training Platform을 가지고 왔다.

유니티 블로그에 자세하게 나타나 있는 것처럼 코기는 레그돌과 조인트 모터로 구성되어 있다.

아래는 코기의 학습을 수행하기 위한 Agent 코드이다.

using System.Collections;
using System.Collections.Generic;
using MLAgents;
using UnityEngine;

public class DogAgent : Agent
{
    [HideInInspector]
    // 코기가 다가가는 타겟
    public Transform target;

    // These items should be set in the inspector
    [Header("Body Parts")]
    public Transform mouthPosition;
    public Transform body;
    public Transform leg0_upper;
    public Transform leg1_upper;
    public Transform leg2_upper;
    public Transform leg3_upper;
    public Transform leg0_lower;
    public Transform leg1_lower;
    public Transform leg2_lower;
    public Transform leg3_lower;

    [Header("Body Rotation")]
    public float maxTurnSpeed;
    public ForceMode turningForceMode;

    JointDriveController jdController;

    [HideInInspector]
    public Vector3 dirToTarget;
    float rotateBodyActionValue;
    // Counts the number of steps until the next agent's decision will be made
    int decisionCounter;

    // [HideInInspector]
    public bool runningToItem;
    // [HideInInspector]
    public bool returningItem;

    private void Awake()
    {
        //Joint Drive Setup
        jdController = GetComponent<JointDriveController>();
        jdController.SetupBodyPart(body);
        jdController.SetupBodyPart(leg0_upper);
        jdController.SetupBodyPart(leg0_lower);
        jdController.SetupBodyPart(leg1_upper);
        jdController.SetupBodyPart(leg1_lower);
        jdController.SetupBodyPart(leg2_upper);
        jdController.SetupBodyPart(leg2_lower);
        jdController.SetupBodyPart(leg3_upper);
        jdController.SetupBodyPart(leg3_lower);
    }

    void RotateBody(float act)
    {
        float speed = Mathf.Lerp(0, maxTurnSpeed, Mathf.Clamp(act, 0, 1));
        Vector3 rotDir = dirToTarget;
        rotDir.y = 0;

        jdController.bodyPartsDict[body].rb.AddForceAtPosition(
            rotDir.normalized * speed * Time.deltaTime, body.forward, turningForceMode);

        jdController.bodyPartsDict[body].rb.AddForceAtPosition(
            -rotDir.normalized * speed * Time.deltaTime, -body.forward, turningForceMode);
    }
   
    public void CollectObservationBodyPart(BodyPart bp)
    {
        var rb = bp.rb;
        AddVectorObs(bp.groundContact.touchingGround ? 1 : 0);

        if (bp.rb.transform != body)
        {
            AddVectorObs(bp.currentXNormalizedRot);
            AddVectorObs(bp.currentYNormalizedRot);
            AddVectorObs(bp.currentZNormalizedRot);
            AddVectorObs(bp.currentStrength / jdController.maxJointForceLimit);
        }
    }

    public override void CollectObservations()
    {
        AddVectorObs(dirToTarget.normalized);
        AddVectorObs(body.localPosition);
        AddVectorObs(jdController.bodyPartsDict[body].rb.velocity); // 몸통의 속도
        AddVectorObs(jdController.bodyPartsDict[body].rb.angularVelocity); // 회전 속도
        AddVectorObs(body.forward); // 몸통의 정면 방향 벡터
        AddVectorObs(body.up); // 몸통의 윗 부분 방향 벡터
        foreach (var bodyPart in jdController.bodyPartsDict.Values)
        {
            CollectObservationBodyPart(bodyPart);
        }
    }

    public override void AgentAction(float[] vectorAction, string textAction)
    {
        var bpDict = jdController.bodyPartsDict;
        // Update joint drive target rotation

        bpDict[leg0_upper].SetJointTargetRotation(vectorAction[0], vectorAction[1], 0);
        bpDict[leg1_upper].SetJointTargetRotation(vectorAction[2], vectorAction[3], 0);
        bpDict[leg2_upper].SetJointTargetRotation(vectorAction[4], vectorAction[5], 0);
        bpDict[leg3_upper].SetJointTargetRotation(vectorAction[6], vectorAction[7], 0);
        bpDict[leg0_lower].SetJointTargetRotation(vectorAction[8], 0, 0);
        bpDict[leg1_lower].SetJointTargetRotation(vectorAction[9], 0, 0);
        bpDict[leg2_lower].SetJointTargetRotation(vectorAction[10], 0, 0);
        bpDict[leg3_lower].SetJointTargetRotation(vectorAction[11], 0, 0);

        // Update joint drive strength
        bpDict[leg0_upper].SetJointStrength(vectorAction[12]);
        bpDict[leg1_upper].SetJointStrength(vectorAction[13]);
        bpDict[leg2_upper].SetJointStrength(vectorAction[14]);
        bpDict[leg3_upper].SetJointStrength(vectorAction[15]);
        bpDict[leg0_lower].SetJointStrength(vectorAction[16]);
        bpDict[leg1_lower].SetJointStrength(vectorAction[17]);
        bpDict[leg2_lower].SetJointStrength(vectorAction[18]);
        bpDict[leg3_lower].SetJointStrength(vectorAction[19]);

        rotateBodyActionValue = vectorAction[20];

    }

    void RewardFunctionMovingTowards()
    {
        float movingTowardsDot = Vector3.Dot(
            jdController.bodyPartsDict[body].rb.velocity, dirToTarget.normalized);
        AddReward(0.01f * movingTowardsDot);
    }

    void RewardFunctionTimePenalty()
    {
        AddReward(-0.001f);  //-0.001f chosen by experimentation.
    }

    public void UpdateDirToTarget ()
    {
        dirToTarget = target.position - jdController.bodyPartsDict[body].rb.position;
    }

    private void FixedUpdate()
    {
        UpdateDirToTarget();

        if (decisionCounter == 0)
        {
            decisionCounter = 3;
            RequestDecision();
        }
        else
        {
            decisionCounter--;
        }

        RotateBody(rotateBodyActionValue);

        var bodyRotationPenalty = -0.001f * rotateBodyActionValue;
        AddReward(bodyRotationPenalty);

        RewardFunctionMovingTowards();
        RewardFunctionTimePenalty();
    }

}

에이전트에서 수행하는 동작은 다음과 같다.

코기의 다리 부분의 조인트 모터에 값 전달

(코기가 직접 달리거나 뛸 수 있도록 한다.)

코기의 몸의 방향 설정

(목표로 하는 막대기로 향하도록 하고, 몸이 뒤집힌 경우 다시 일어나서 움직일 수 있도록 한다.)

그 외에도 목표물을 향해 빠르게 다가가는 경우 보상,

시간이 지나는 경우 페널티 부과

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class TrainingTarget : MonoBehaviour
{
    [Header("General")]
    public DogAgent trainee;
    public Transform spawnArea;

    private Bounds spawnAreaBounds;



    // Use this for initialization
    void Start()
    {
        spawnAreaBounds = spawnArea.GetComponent<Collider>().bounds;
        SpawnItemTraining();
    }

    // Update is called once per frame
    void FixedUpdate()
    {
        if (trainee.dirToTarget.magnitude < 1)
        {
            TouchedTargetTraining();
        }
    }

    /// <summary>
    /// Use the ground's bounds to pick a random spawn position.
    /// </summary>
    public void SpawnItemTraining()
    {
        Vector3 randomSpawnPos = Vector3.zero;
        float randomPosX = Random.Range(-spawnAreaBounds.extents.x, spawnAreaBounds.extents.x);
        float randomPosZ = Random.Range(-spawnAreaBounds.extents.z, spawnAreaBounds.extents.z);
        transform.position = spawnArea.transform.position + new Vector3(randomPosX, 1f, randomPosZ);
    }

    /// <summary>
    /// Agent touched the target
    /// </summary>
    public void TouchedTargetTraining()
    {
        trainee.AddReward(1); //Dog Fetch
        SpawnItemTraining();
        trainee.Done();
    }

}

코기의 에이전트와 상호작용 하는 타깃은 위와 같이 동작한다.

코기가 가까이 다가오는 경우 코기의 에이전트에 리워드 지급

랜덤 한 위치에 타깃 재 생성

유니티 블로그에서 제공하고 있는 코기 샘플만큼은 아니지만

의도대로 동작하는 모습을 확인했다.

코기 샘플을 이용하여 다음에는 응용한 다른 프로젝트를 진행해보려고 한다.

코기와 같은 방식으로 움직이는 인간형 모델을 생각해보면

아마 좀비? 가 가장 가깝지 않을까 생각이 든다.

'Unity' 카테고리의 다른 글

[Unity] ml-agents 설치하기 및 테스트 (0)	2020.01.19
[Unity] ML-Agents로 간단한 강화학습 예제 만들어보기 (2)	2019.05.16

퍼즐잎의 기술블로그

[Unity] 퍼포 더 코기 (ML-Agents) 강화학습 공부

'Unity' 카테고리의 다른 글

티스토리툴바

[Unity] 퍼포 더 코기 (ML-Agents) 강화학습 공부

'Unity' 카테고리의 다른 글

'Unity' Related Articles

티스토리툴바