强化学习入门——1 基于《深入浅出强化学习——编程实践》

鸳鸯环境类的搭建代码

1.YuanYang_Env.py

import pygame
import random
import numpy as np
from 深入浅出强化学习_编程实践.图片库.Colour_Tipriest import *

class YuanYangEnv:
    def __init__(self):
        self.states = []
        for i in range(100):
            self.states.append(i)
        self.actions = ['e', 's', 'w', 'n']
        self.gamma = 0.8
        self.value = np.zeros((10, 10))
        self.viewer = None
        self.FPSCLOCK = pygame.time.Clock()
        # 屏幕大小
        self.screen_size = (1200, 900)
        self.bird_position = (0, 0)
        self.limit_distance_x = 120
        self.limit_distance_y = 90
        self.obstacle_size = [120, 90]
        self.obstacle1_x = []
        self.obstacle1_y = []
        self.obstacle2_x = []
        self.obstacle2_y = []
        self.bird_male_init_position = [0, 0]
        self.bird_male_position = [0, 0]
        self.bird_female_init_position = [1080, 0]

        for i in range(8):
            self.obstacle1_x.append(360)
            # 第一个障碍
            if i = self.limit_distance_x or min_dy >= self.limit_distance_y:
                flag1 = 0
        # 判断第二个障碍物
        second_dx = []
        second_dy = []
        for i in range(8):
            dx2 = abs(self.obstacle2_x[i] - state_position[0])
            second_dx.append(dx2)
            dy2 = abs(self.obstacle2_y[i] - state_position[1])
            second_dy.append(dy2)
            min_dx = min(second_dx)
            min_dy = min(second_dy)
            if min_dx >= self.limit_distance_x or min_dy >= self.limit_distance_y:
                flag2 = 0

            if flag1 == 0 and flag2 == 0:
                flag = 0
            # 判断是否与边界碰撞
            if state_position[0] > 1080 or state_position[0] < 0 or \
                    state_position[1] > 810 or state_position[1] < 0:
                flag = 1
            return flag

    def find(self, state_position):
        flag = 0
        if abs(state_position[0] - self.bird_female_init_position[0]) < self.limit_distance_x and \
                abs(state_position[1] - self.bird_female_init_position[1]) < self.limit_distance_y:
            flag = 1
        return flag

    def state_to_position(self, state):
        i = int(state / 10)
        j = state % 10
        position = [0, 0]
        position[0] = 120 * j
        position[1] = 90 * i
        return position

    def position_to_state(self, position):
        i = position[0] / 120
        j = position[1] / 90
        return int(i + 10 * j)

    def reset(self):
        # 随机产生初始状态
        #global state
        flag1 = 1
        flag2 = 1
        state = self.states[int(random.random() * len(self.states))]
        while flag1 == 1 or flag2 == 1:
            # 产生一个不违法的随机位置
            state = self.states[int(random.random() * len(self.states))]
            state_position = self.state_to_position(state)
            flag1 = self.collide(state_position)
            flag2 = self.find(state_position)
        return state

    def transform(self, state, action):
        #将当前状态转化为坐标
        current_position = self.state_to_position(state)
        next_position = [0,0]
        flag_collide = 0
        flag_find = 0
        #判断当前坐标是否与障碍物碰撞
        flag_collide = self.collide(current_position)
        #判断是否到达终点
        flag_find = self.find(current_position)
        if flag_find==1 or flag_collide==1:
            return state, 0, True
        #状态转移
        if action =='e':
            next_position[0] = current_position[0] +120
            next_position[1] = current_position[1]
        if action =='s':
            next_position[0] = current_position[0]
            next_position[1] = current_position[1] + 90
        if action =='w':
            next_position[0] = current_position[0] -120
            next_position[1] = current_position[1]
        if action =='n':
            next_position[0] = current_position[0]
            next_position[1] = current_position[1] - 90
        #判断 next_state 是否障碍物相碰撞
        flag_collide = self.collide(next_position)
        if flag_collide==1:
            return self.position_to_state(current_position), -1 , True
        #判断 next_state 是否是终点
        flag_find = self.find(next_position)
        if flag_find ==1:
            return self.position_to_state(current_position),  1 , True

        return self.position_to_state(current_position),  0 , True

    def gameover(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit()

    def render(self):
        if self.viewer is None:
            pygame.init()
            #画一个窗口
        self.viewer = pygame.display.set_mode(self.screen_size, 0, 32)
        pygame.display.set_caption("鸳鸯")
        #Download picture
        self.bird_male = pygame.image.load("图片库/yuanyang.png")
        self.bird_female = pygame.image.load("图片库/yuanyang.png")
        self.background = pygame.image.load("图片库/背景.jpg")
        self.obstacle = pygame.image.load("图片库/箱子.jpg")
        #在幕布上画图片
        self.viewer.blit(self.bird_male, self.bird_male_init_position)
        self.viewer.blit(self.bird_female, self.bird_female_init_position)
        self.viewer.blit(self.background, (0, 0))
        self.font = pygame.font.SysFont('times', 20)
        #self.viewer.blit(self.background,(0,0))

        #画直线
        for i in range(11):
            pygame.draw.lines(self.viewer, BLACK,  True, ((120*i,0), (120*i,900)) , 1  )
            pygame.draw.lines(self.viewer, BLACK, True, ((0, 90 * i), (1200, 90 * i)) , 1)
            self.viewer.blit(self.bird_female, self.bird_female_init_position)
        #画障碍物
        for i in range(8):
            self.viewer.blit(self.obstacle, (self.obstacle1_x[i], self.obstacle1_y[i]))
            self.viewer.blit(self.obstacle, (self.obstacle2_x[i], self.obstacle2_y[i]))

        #画小鸟
        self.viewer.blit(self.bird_male, self.bird_male_position)
        #画值函数
        for i in range(10):
            for j in range(10):
                surface = self.font.render(str(round(float(self.value[i,j]), 3)), True, BLACK, WHITE)
                self.viewer.blit(surface, (120*i+5, 90*j+70))
        pygame.display.update()
        self.gameover()
        self.FPSCLOCK.tick(30)

if __name__=="__main__":
    yy = YuanYangEnv()
    yy.render()
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit()

2.Colour_Tipriest.py

Colors (R, G, B)
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)
YELLOW = (255, 255, 0)
DarkViolet = (148, 0, 211)
SlateBlue = (106, 90, 205)

Original: https://blog.csdn.net/titan__/article/details/119709931
Author: Tipriest_
Title: 强化学习入门——1 基于《深入浅出强化学习——编程实践》

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/783589/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球