欢迎光临散文网 会员登陆 & 注册

机器人Q-Learning算法避障寻找目标点源码

2023-08-15 23:53 作者:中电新青年-探索号  | 我要投稿

机器人Q-Learning算法避障寻找目标点源码

寻找目标地图(红色方块:机器人,黑色方块:障碍物,黄色区域:目标点,网格地图)

#地图文件代码

maze_env.py

"""
Reinforcement learning maze example.
Red rectangle:          explorer.
Black rectangles:       hells       [reward = -1].
Yellow bin circle:      paradise    [reward = +1].
All other states:       ground      [reward = 0].

"""

import numpy as np
import time
import sys
if sys.version_info.major == 2:
   import Tkinter as tk
else:
   import tkinter as tk

UNIT = 40   # pixels
MAZE_H = 4  # grid height
MAZE_W = 4  # grid width


class Maze(tk.Tk, object):
   def __init__(self):
       super(Maze, self).__init__()
       self.action_space = ['u', 'd', 'l', 'r']
       self.n_actions = len(self.action_space)
       self.title('maze')
       self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
       self._build_maze()

   def _build_maze(self):
       self.canvas = tk.Canvas(self, bg='white',
                          height=MAZE_H * UNIT,
                          width=MAZE_W * UNIT)

       # create grids
       for c in range(0, MAZE_W * UNIT, UNIT):
           x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
           self.canvas.create_line(x0, y0, x1, y1)
       for r in range(0, MAZE_H * UNIT, UNIT):
           x0, y0, x1, y1 = 0, r, MAZE_H * UNIT, r
           self.canvas.create_line(x0, y0, x1, y1)

       # create origin
       origin = np.array([20, 20])

       # hell
       hell1_center = origin + np.array([UNIT * 2, UNIT])
       self.hell1 = self.canvas.create_rectangle(
           hell1_center[0] - 15, hell1_center[1] - 15,
           hell1_center[0] + 15, hell1_center[1] + 15,
           fill='black')
       # hell
       hell2_center = origin + np.array([UNIT, UNIT * 2])
       self.hell2 = self.canvas.create_rectangle(
           hell2_center[0] - 15, hell2_center[1] - 15,
           hell2_center[0] + 15, hell2_center[1] + 15,
           fill='black')

       # create oval
       oval_center = origin + UNIT * 2
       self.oval = self.canvas.create_oval(
           oval_center[0] - 15, oval_center[1] - 15,
           oval_center[0] + 15, oval_center[1] + 15,
           fill='yellow')

       # create red rect
       self.rect = self.canvas.create_rectangle(
           origin[0] - 15, origin[1] - 15,
           origin[0] + 15, origin[1] + 15,
           fill='red')

       # pack all
       self.canvas.pack()

   def reset(self):
       self.update()
       time.sleep(0.5)
       self.canvas.delete(self.rect)
       origin = np.array([20, 20])
       self.rect = self.canvas.create_rectangle(
           origin[0] - 15, origin[1] - 15,
           origin[0] + 15, origin[1] + 15,
           fill='red')
       # return observation
       return self.canvas.coords(self.rect)

   def step(self, action):
       s = self.canvas.coords(self.rect)
       base_action = np.array([0, 0])
       if action == 0:   # up
           if s[1] > UNIT:
               base_action[1] -= UNIT
       elif action == 1:   # down
           if s[1] < (MAZE_H - 1) * UNIT:
               base_action[1] += UNIT
       elif action == 2:   # right
           if s[0] < (MAZE_W - 1) * UNIT:
               base_action[0] += UNIT
       elif action == 3:   # left
           if s[0] > UNIT:
               base_action[0] -= UNIT

       self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent

       s_ = self.canvas.coords(self.rect)  # next state

       # reward function
       if s_ == self.canvas.coords(self.oval):
           reward = 1
           done = True
           s_ = 'terminal'
       elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
           reward = -1
           done = True
           s_ = 'terminal'
       else:
           reward = 0
           done = False

       return s_, reward, done
   def render(self):
       time.sleep(0.1)
       self.update()
def update():
   for t in range(10):
       s = env.reset()
       while True:
           env.render()
           a = 1
           s, r, done = env.step(a)
           if done:
               break

if __name__ == '__main__':
   env = Maze()
   env.after(100, update)
   env.mainloop()

#Q learning 训练算法代码

RL_brain.py

"""
This part of code is the Q learning brain, which is a brain of the agent.
All decisions are made in here.

"""
import numpy as np
import pandas as pd


class QLearningTable:
   def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
       self.actions = actions  # a list
       self.lr = learning_rate
       self.gamma = reward_decay
       self.epsilon = e_greedy
       self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

   def choose_action(self, observation):
       self.check_state_exist(observation)
       # action selection
       if np.random.uniform() < self.epsilon:
           # choose best action
           state_action = self.q_table.loc[observation, :]
           state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
           action = state_action.idxmax()
       else:
           # choose random action
           action = np.random.choice(self.actions)
       return action

   def learn(self, s, a, r, s_):
       self.check_state_exist(s_)
       q_predict = self.q_table.loc[s, a]
       if s_ != 'terminal':
           q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
       else:
           q_target = r  # next state is terminal
       self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update

   def check_state_exist(self, state):
       if state not in self.q_table.index:
           # append new state to q table
           self.q_table = self.q_table.append(
               pd.Series(
                   [0]*len(self.actions),
                   index=self.q_table.columns,
                   name=state,
               )
           )

#运行主函数

run.py

"""
Reinforcement learning maze example.

Red rectangle:          explorer.
Black rectangles:       hells       [reward = -1].
Yellow bin circle:      paradise    [reward = +1].
All other states:       ground      [reward = 0].

"""

from maze_env import Maze
from RL_brain import QLearningTable


def update():
   for episode in range(100):
       # initial observation
       observation = env.reset()

       while True:
           # fresh env
           env.render()

           # RL choose action based on observation
           action = RL.choose_action(str(observation))

           # RL take action and get next observation and reward
           observation_, reward, done = env.step(action)

           # RL learn from this transition
           RL.learn(str(observation), action, reward, str(observation_))

           # swap observation
           observation = observation_

           # break while loop when end of this episode
           if done:
               break

   # end of game
   print('game over')
   env.destroy()

if __name__ == "__main__":
   env = Maze()
   RL = QLearningTable(actions=list(range(env.n_actions)))

   env.after(100, update)
   env.mainloop()

机器人Q-Learning算法避障寻找目标点源码的评论 (共 条)

分享到微博请遵守国家法律