Skip to content

Commit 20f5263

Browse files
committed
Added gym wrapper with basic README
1 parent 6b7a9de commit 20f5263

File tree

6 files changed

+325
-0
lines changed

6 files changed

+325
-0
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.ipynb_checkpoints
2+
*.egg-info
3+
*.swp
4+
*.py~
5+
*.pyc
6+
.idea
7+
/logs
8+
*~

README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# How to adding the environment in Gym
2+
3+
To add the environment, do
4+
`cd gym-np-snake`
5+
`pip install -e .`
6+
7+
The gym environment is only supporting single agent setup. However the Snake Trainer and SnakeEnv as listed in parent directory is designed to work for MultiAgent Cooperative setup.
8+
9+
To use the snakegame as gym-environment, use the bellow code sample :
10+
```
11+
import gym;
12+
import gym_np_snake;
13+
env = gym.make('SnakeNp-v0')
14+
from time import sleep
15+
reward=0;
16+
17+
while reward>-0.5:
18+
ob,reward,dones,info = env.step(0)
19+
env.render(mode='human')
20+
sleep(0.1)
21+
```
22+
## Game Description for gym-environment
23+
Reward is between -1 and 1. -1 for collision and 1 for eating the food.
24+
Observation is 10x10x1 image output of encoded display. Mapping is as follows
25+
26+
| **pixel description** | **pixel value** |
27+
|-----------------------|-----------------|
28+
| empty space | 5 |
29+
| snake head west | 100 |
30+
| snake head north | 120 |
31+
| snake head east | 140 |
32+
| snake head south | 160 |
33+
| snake tail | 200 |
34+
| food | 255 |
35+
36+
Info contains the state information such 4 element tuple with following values (direction/4, normalized x distance, normalized y distance, distance of head from food divided by length+width)
37+
Interpretation of direction for the game is as follow :
38+
39+
| **direction** | **value** |
40+
|---------------------------|
41+
| west | 0 |
42+
| north | 1 |
43+
| east | 2 |
44+
| south | 3 |
45+
46+
47+
48+
The actions are Discrete(3) , which means, either a snake can go left, go straight or go right. This is as follow :
49+
50+
| **action** | **value** |
51+
|---------------------------|
52+
| no change | 0 |
53+
| turn left | 1 |
54+
| turn right | 2 |

gym-np-snake/gym_np_snake/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import logging
2+
from gym.envs.registration import register
3+
4+
logger = logging.getLogger(__name__)
5+
6+
7+
register(
8+
id='SnakeNp-v0',
9+
entry_point='gym_np_snake.envs:SnakeNpSingleEnv'
10+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from gym_np_snake.envs.snake_np_env import SnakeNpSingleEnv
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
import numpy as np
2+
import os, subprocess, time, signal
3+
import gym
4+
from gym import error, spaces
5+
from gym import utils
6+
from gym.utils import seeding
7+
from IPython.display import clear_output
8+
9+
class DisplayParam:
10+
emptyChar = ' '
11+
wallChar = '|'
12+
foodChar = 'x'
13+
snakeChar = 'o'
14+
gridtopleftChar = chr(0x250F)
15+
gridtoprightChar = chr(0x2513)
16+
gridbottomleftChar = chr(0x2517)
17+
gridbottomrightChar = chr(0x251B)
18+
gridverticalChar = chr(0x2503)
19+
gridhorizontalChar = chr(0x2501)
20+
arrowoffset = 0x2190
21+
class Grid:
22+
def __init__(self, length, width):
23+
self.dp = DisplayParam()
24+
self.length, self.width = length, width
25+
self.grid = np.zeros((length, width),dtype=np.int32)
26+
self.grid[:] = ord(self.dp.emptyChar)
27+
28+
def reset(self):
29+
self.grid = np.zeros((self.length,self.width),dtype=np.int32);
30+
self.grid[:]=ord(self.dp.emptyChar)
31+
def itemset(self,pos,val):
32+
self.grid[pos[0]][pos[1]] = val
33+
34+
def display(self):
35+
print (self.dp.gridtopleftChar+''.join([self.dp.gridhorizontalChar]*(self.width*2-1))+self.dp.gridtoprightChar)
36+
for row in self.grid:
37+
print(self.dp.gridverticalChar+' '.join([chr(r) for r in row])+self.dp.gridverticalChar)
38+
print (self.dp.gridbottomleftChar+''.join([self.dp.gridhorizontalChar]*(self.width*2-1))+self.dp.gridbottomrightChar)
39+
clear_output(wait=True)
40+
41+
class Snake:
42+
def __init__(self, pos, snake_id):
43+
self.length = 1
44+
self.head = pos
45+
self.snake_id = snake_id
46+
self.pos_ary = [pos]
47+
self.direction = np.random.randint(4)
48+
49+
def move(self, action,food_pos):
50+
self.head = tuple(map(sum, zip(self.head, self.decodeAction(action))))
51+
eaten_something = False
52+
if(self.head == food_pos):
53+
eaten_something = True
54+
self.pos_ary.append(self.head)
55+
if not eaten_something:
56+
self.pos_ary.pop(0)
57+
else:
58+
self.length += 1
59+
return eaten_something;
60+
61+
def reset(self,pos):
62+
self.length=1;
63+
self.head=pos;
64+
self.pos_ary=[pos];
65+
66+
67+
def decodeAction(self,a):
68+
dirAry=[(0,-1),(-1,0),(0,1),(1,0)] #WNES
69+
if a == 1: # Left
70+
self.direction=(self.direction+4-1)%4
71+
elif a == 2:
72+
self.direction=(self.direction+1)%4
73+
74+
return dirAry[self.direction]
75+
76+
77+
78+
class SnakeGame:
79+
def __init__(self,length,width,n_snakes=1):
80+
self.dp = DisplayParam()
81+
self.length,self.width,self.n_snakes = length,width,n_snakes;
82+
self.grid = Grid(length,width)
83+
self.n_snakes=n_snakes
84+
if n_snakes+1>=length*width:
85+
raise Exception('too many snakes')
86+
self.snakes=[]
87+
self.update_food()
88+
self.addSnakes()
89+
90+
def has_snake(self,pos):
91+
for i in range(len(self.snakes)):
92+
if pos in self.snakes[i].pos_ary:
93+
return True;
94+
return False;
95+
96+
def sample_empty_pos(self):
97+
pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
98+
while self.has_snake(pos) or pos == self.food_pos:
99+
pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
100+
return pos
101+
102+
def addSnakes(self):
103+
pos_ary=[]
104+
for i in range(self.n_snakes):
105+
pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
106+
while pos in pos_ary or (pos == self.food_pos):
107+
pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
108+
pos_ary.append(pos);
109+
self.snakes.append(Snake(pos,i))
110+
111+
def inside_grid(self,pos):
112+
pos = list(pos)
113+
if pos[0]>=0 and pos[1]>=0 and pos[0]<self.grid.length and pos[1]<self.grid.width:
114+
return True
115+
return False
116+
117+
def hasCollided(self):
118+
for snake in self.snakes:
119+
head, snake_id = snake.head, snake.snake_id;
120+
if not self.inside_grid(head):
121+
return True
122+
for snake in self.snakes:
123+
if snake.snake_id==snake_id:
124+
if head in snake.pos_ary[:-1]:
125+
return True
126+
else:
127+
if head in snake.pos_ary:
128+
return True
129+
130+
131+
def update_food(self):
132+
food_pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
133+
while self.has_snake(food_pos):
134+
food_pos = (np.random.randint(self.grid.length),np.random.randint(self.grid.width))
135+
self.food_pos = food_pos
136+
137+
138+
139+
def get_observation(self):
140+
obs = self.grid.grid.copy()
141+
obs[obs == ord(self.dp.emptyChar)] = 5
142+
obs[obs == ord(self.dp.foodChar)] = 255
143+
obs[obs == ord(self.dp.snakeChar)] = 200
144+
145+
for i in range(4):
146+
obs[obs == self.dp.arrowoffset+i] = i*20+100
147+
148+
return np.array(obs,dtype=np.uint8);
149+
150+
def display(self,verbose=False):
151+
self.grid.reset()
152+
self.grid.itemset(self.food_pos,ord(self.dp.foodChar))
153+
for snake in self.snakes:
154+
for pos in snake.pos_ary:
155+
self.grid.itemset(pos,ord(self.dp.snakeChar))
156+
self.grid.itemset(snake.head, self.dp.arrowoffset+snake.direction)
157+
#self.grid.itemset(snake.head, '#')
158+
if verbose == True :
159+
self.grid.display()
160+
return self.grid
161+
162+
def get_state(self):
163+
states = np.zeros((self.n_snakes,4),dtype=np.float)
164+
for i in range(self.n_snakes):
165+
states[i][0] = float(self.snakes[i].direction)/4.0 # Direction of snake head
166+
states[i][1] = float(self.snakes[i].head[0])/self.grid.length
167+
states[i][2] = float(self.snakes[i].head[1])/self.grid.width
168+
states[i][3] = float(np.sum(np.absolute(np.array(self.snakes[i].head)-np.array(self.food_pos))))/(self.grid.length+self.grid.width)
169+
return states
170+
171+
def reset(self):
172+
self.grid = Grid(self.length,self.width)
173+
if self.n_snakes+1>=self.length*self.width:
174+
raise Exception('too many snakes')
175+
self.snakes=[]
176+
self.update_food()
177+
self.addSnakes()
178+
179+
180+
def step(self,action_list,verbose=False):
181+
"""
182+
state = 4 x nsnakes info with (direction,xpos,ypos,distance from food)
183+
"""
184+
rewards,dones = np.zeros((self.n_snakes),dtype=np.float), np.zeros((self.n_snakes)).astype('bool')
185+
for i in range(self.n_snakes):
186+
eaten_food = self.snakes[i].move(action_list[i],self.food_pos)
187+
if eaten_food == True :
188+
self.update_food()
189+
rewards[i]=1; # Eaten food
190+
if self.hasCollided() == True:
191+
self.snakes[i].reset(self.sample_empty_pos())
192+
rewards[i]=-1;
193+
dones[i]=True;
194+
195+
self.display(verbose=verbose)
196+
observation = self.get_observation();
197+
observation = np.repeat(observation,repeats=self.n_snakes,axis=0)
198+
states = self.get_state()
199+
return observation,rewards,dones,states;
200+
201+
202+
203+
def close(self):
204+
pass
205+
206+
207+
class HyperParameters:
208+
length = 10
209+
width = 10
210+
211+
class SnakeNpSingleEnv(gym.Env):
212+
"""
213+
Gym wrapper over the numpy Game
214+
"""
215+
def __init__(self):
216+
217+
self.length=HyperParameters.length;
218+
self.width = HyperParameters.width;
219+
self.observation_space = spaces.Box(low=0,high=255,shape=(self.length, self.width, 1),dtype=np.uint8)
220+
self.action_space = spaces.Discrete(3)
221+
self.env = SnakeGame(self.length, self.width, 1)
222+
223+
224+
225+
226+
def step(self,action):
227+
ob, reward,episode_over, states = self.env.step([action])
228+
ob = np.reshape(ob,(self.length,self.width,1))
229+
reward = reward[0]
230+
episode_over = episode_over[0]
231+
info = {"states":states}
232+
return ob,reward,episode_over,info
233+
234+
235+
236+
def reset(self):
237+
self.env.reset()
238+
239+
def render(self,mode='human',close=False,verbose=True):
240+
return self.env.display(verbose=verbose)
241+
242+
def seed(self,seed_val):
243+
pass

gym-np-snake/setup.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from setuptools import setup
2+
3+
setup(name='gym_numpy_snake',
4+
version='0.0.1',
5+
install_requires=['gym>=0.2.3',
6+
'numpy>=1.14.1',
7+
'IPython'
8+
]
9+
)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy