Stable baselines in child script

Typically: "How do I... ", "How can I... " questions
Post Reply
Adersh M
Posts: 16
Joined: 30 Nov 2022, 06:52

Stable baselines in child script

Post by Adersh M »

I have created a gym environment for the Kondo humanoid robot in a non-threaded script. I want to sue SB3 to run RL in this environment. I have tested the gym environment, and it works well, but SB3 doesn't seem to work, nor does it give any output. Please have a look

Code: Select all

#python
import gymnasium as gym
from gymnasium import spaces
import math
import numpy as np
import time
from stable_baselines3 import PPO
import sys


class KondoEnv(gym.Env):
    """Custom Environment for Kondo in CoppeliaSim"""

    metadata = {"render_modes": ["human"], "render_fps": 30}

    def __init__(self, sim, joint_names):
        super(KondoEnv, self).__init__()
        self.sim = sim
        self.joint_names = joint_names
        
        # Define action and observation space
        self.action_space = spaces.Box(low=-20, high=20, shape=(len(joint_names),), dtype=np.float32)
        self.observation_space = spaces.Box(low=np.array([-100, 0]), high=np.array([100, 2]), dtype=np.float32)

        # Initialize objects
        self.KondoMarker = sim.getObject('/base_link_respondable')
        self.Goal = sim.getObject('/Goal')
        self.Worldref = sim.getObject('/GlobalFrame')

    def step(self, action):
        # Execute one time step within the environment
        for idx, joint_name in enumerate(self.joint_names):
            self.sim.setJointTargetVelocity(globals()[joint_name[1:].replace('/', '')], action[idx])

        # Calculate observation
        distance_to_goal = np.linalg.norm(np.array(self.sim.getObjectPosition(self.KondoMarker, self.Worldref)) - np.array(self.sim.getObjectPosition(self.Goal, self.Worldref)))
        height = self.sim.getObjectPosition(self.KondoMarker, self.Worldref)[2]
        self.observation = np.array([distance_to_goal, height])

        # Calculate reward
        self.reward = -distance_to_goal - 0.2 * abs(0.6824 - height)

        # Set termination conditions
        if distance_to_goal < 0.4:
            self.terminated = True
            self.reward = 100
        elif height < 0.5:
            self.terminated = True
            self.reward = -100
        else:
            self.terminated = False

        self.truncated = False
        self.info = {}
        return self.observation, self.reward, self.terminated, self.truncated, self.info

    def reset(self, seed=None):
        # Reset environment
        self.terminated = False

        # Reset observation space
        distance_to_goal = np.linalg.norm(np.array(self.sim.getObjectPosition(self.KondoMarker, self.Worldref)) - np.array(self.sim.getObjectPosition(self.Goal, self.Worldref)))
        height = self.sim.getObjectPosition(self.KondoMarker, self.Worldref)[2]
        self.observation = np.array([distance_to_goal, height])

        # Reset Kondo position and orientation
        self.sim.setObjectPosition(self.KondoMarker, self.Worldref, [0, 0, 0.6824])
        self.sim.setObjectOrientation(self.KondoMarker, self.Worldref, [0, 0, 0])

        # Reset joint angles and velocities
        for joint_name in self.joint_names:
            self.sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, position_control_mode)
            self.sim.setJointTargetPosition(globals()[joint_name[1:].replace('/', '')], 0)
            self.sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, velocity_control_mode)

        return self.observation


def sysCall_init():
    # Importing required modules
    sim = require('sim')

    # Global variables
    global dynamic_control
    global velocity_control_mode
    global position_control_mode
    global KondoMarker
    global Goal
    global Worldref
    global joint_names
    global init_vel
    global loop_count
    global env
    global model  # Add this line to access the global variable

    # Creating a list of joint names
    joint_names = ['/Waist', '/RightFemurHead1', '/RightFemurHead2', '/RightFemur', '/RightCrus', '/RightAnkle', '/RightFoot', '/LeftFemurHead1', '/LeftFemurHead2', '/LeftFemur', '/LeftCrus', '/LeftAnkle', '/LeftFoot', '/RightShoulder', '/RightArm', '/RightElbow', '/RightForearm', '/LeftShoulder', '/LeftArm', '/LeftElbow', '/LeftForearm', '/Head']
   
    # Setting handles for the Kondo robot and other objects
    KondoMarker = sim.getObject('/base_link_respondable')
    Goal = sim.getObject('/Goal')
    Worldref = sim.getObject('/GlobalFrame')
    

    # Initializing parameters
    dynamic_control = sim.jointintparam_dynctrlmode
    velocity_control_mode = sim.jointdynctrl_velocity
    position_control_mode = sim.jointdynctrl_position
    init_vel = 0.2
    loop_count = 0  

    # Create joint handles and set control modes and initial velocities
    for joint_name in joint_names:
        # Creating a global variable for each joint
        globals()[joint_name[1:].replace('/', '')] = sim.getObject(joint_name)
        
        # Setting the control mode and initial velocity for each joint
        sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, velocity_control_mode)
        sim.setJointTargetVelocity(globals()[joint_name[1:].replace('/', '')], init_vel)
    
    env = KondoEnv(sim, joint_names)
    # Create a SB3 model
    model = PPO("MlpPolicy", env, verbose=True)
    model.learn(total_timesteps=1000)

    # Redirect the training output to CoppeliaSim terminal
    sys.stdout = sim.auxiliaryConsoleOpen("Training Output", 200, 4)
    
def sysCall_actuation():
    global loop_count  # Add this line to access the global variable
    loop_count = loop_count + 1

    # Execute one time step within the environment
    obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
    # # Reset the simulation after 1000 steps
   # Reset Kondo position and orientation
    if terminated or loop_count > 1000:
        loop_count = 0
        env.reset()
    pass

def sysCall_sensing():
    # put your sensing code here
    pass

def sysCall_cleanup():
    # do some clean-up here
    pass

coppelia
Site Admin
Posts: 10375
Joined: 14 Dec 2012, 00:25

Re: Stable baselines in child script

Post by coppelia »

Hello,

I didn't understand what the exact problem was, when you say that it doesn't seem to work, nor does it give any output... does that mean the CoppeliaSim hangs? Or that the script stops running eventually? Until what point is the script executed?

Cheers

Post Reply