Code: Select all
#python
import gymnasium as gym
from gymnasium import spaces
import math
import numpy as np
import time
from stable_baselines3 import PPO
import sys
class KondoEnv(gym.Env):
"""Custom Environment for Kondo in CoppeliaSim"""
metadata = {"render_modes": ["human"], "render_fps": 30}
def __init__(self, sim, joint_names):
super(KondoEnv, self).__init__()
self.sim = sim
self.joint_names = joint_names
# Define action and observation space
self.action_space = spaces.Box(low=-20, high=20, shape=(len(joint_names),), dtype=np.float32)
self.observation_space = spaces.Box(low=np.array([-100, 0]), high=np.array([100, 2]), dtype=np.float32)
# Initialize objects
self.KondoMarker = sim.getObject('/base_link_respondable')
self.Goal = sim.getObject('/Goal')
self.Worldref = sim.getObject('/GlobalFrame')
def step(self, action):
# Execute one time step within the environment
for idx, joint_name in enumerate(self.joint_names):
self.sim.setJointTargetVelocity(globals()[joint_name[1:].replace('/', '')], action[idx])
# Calculate observation
distance_to_goal = np.linalg.norm(np.array(self.sim.getObjectPosition(self.KondoMarker, self.Worldref)) - np.array(self.sim.getObjectPosition(self.Goal, self.Worldref)))
height = self.sim.getObjectPosition(self.KondoMarker, self.Worldref)[2]
self.observation = np.array([distance_to_goal, height])
# Calculate reward
self.reward = -distance_to_goal - 0.2 * abs(0.6824 - height)
# Set termination conditions
if distance_to_goal < 0.4:
self.terminated = True
self.reward = 100
elif height < 0.5:
self.terminated = True
self.reward = -100
else:
self.terminated = False
self.truncated = False
self.info = {}
return self.observation, self.reward, self.terminated, self.truncated, self.info
def reset(self, seed=None):
# Reset environment
self.terminated = False
# Reset observation space
distance_to_goal = np.linalg.norm(np.array(self.sim.getObjectPosition(self.KondoMarker, self.Worldref)) - np.array(self.sim.getObjectPosition(self.Goal, self.Worldref)))
height = self.sim.getObjectPosition(self.KondoMarker, self.Worldref)[2]
self.observation = np.array([distance_to_goal, height])
# Reset Kondo position and orientation
self.sim.setObjectPosition(self.KondoMarker, self.Worldref, [0, 0, 0.6824])
self.sim.setObjectOrientation(self.KondoMarker, self.Worldref, [0, 0, 0])
# Reset joint angles and velocities
for joint_name in self.joint_names:
self.sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, position_control_mode)
self.sim.setJointTargetPosition(globals()[joint_name[1:].replace('/', '')], 0)
self.sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, velocity_control_mode)
return self.observation
def sysCall_init():
# Importing required modules
sim = require('sim')
# Global variables
global dynamic_control
global velocity_control_mode
global position_control_mode
global KondoMarker
global Goal
global Worldref
global joint_names
global init_vel
global loop_count
global env
global model # Add this line to access the global variable
# Creating a list of joint names
joint_names = ['/Waist', '/RightFemurHead1', '/RightFemurHead2', '/RightFemur', '/RightCrus', '/RightAnkle', '/RightFoot', '/LeftFemurHead1', '/LeftFemurHead2', '/LeftFemur', '/LeftCrus', '/LeftAnkle', '/LeftFoot', '/RightShoulder', '/RightArm', '/RightElbow', '/RightForearm', '/LeftShoulder', '/LeftArm', '/LeftElbow', '/LeftForearm', '/Head']
# Setting handles for the Kondo robot and other objects
KondoMarker = sim.getObject('/base_link_respondable')
Goal = sim.getObject('/Goal')
Worldref = sim.getObject('/GlobalFrame')
# Initializing parameters
dynamic_control = sim.jointintparam_dynctrlmode
velocity_control_mode = sim.jointdynctrl_velocity
position_control_mode = sim.jointdynctrl_position
init_vel = 0.2
loop_count = 0
# Create joint handles and set control modes and initial velocities
for joint_name in joint_names:
# Creating a global variable for each joint
globals()[joint_name[1:].replace('/', '')] = sim.getObject(joint_name)
# Setting the control mode and initial velocity for each joint
sim.setObjectInt32Param(globals()[joint_name[1:].replace('/', '')], dynamic_control, velocity_control_mode)
sim.setJointTargetVelocity(globals()[joint_name[1:].replace('/', '')], init_vel)
env = KondoEnv(sim, joint_names)
# Create a SB3 model
model = PPO("MlpPolicy", env, verbose=True)
model.learn(total_timesteps=1000)
# Redirect the training output to CoppeliaSim terminal
sys.stdout = sim.auxiliaryConsoleOpen("Training Output", 200, 4)
def sysCall_actuation():
global loop_count # Add this line to access the global variable
loop_count = loop_count + 1
# Execute one time step within the environment
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
# # Reset the simulation after 1000 steps
# Reset Kondo position and orientation
if terminated or loop_count > 1000:
loop_count = 0
env.reset()
pass
def sysCall_sensing():
# put your sensing code here
pass
def sysCall_cleanup():
# do some clean-up here
pass