updtd runner to obtain user specified hyperparams

crazymuse · crazymuse · commit 74c13bdabcc1 · 2018-03-06T10:47:12.000+05:30
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 # Running snake game without Gym Environment
 To run snake game without gym environment, you can look into folder [multiagent-cooperative env](https://github.com/crazymuse/snakegame-numpy/blob/master/multiagent-cooperative-env/SnakeExample.ipynb). Open jupyter notebook from same and run the sample code.
 
+![](preview.png)
+
 ### Prerequisities
 * Install [OpenAI Gym](https://github.com/openai/gym) and [Baselines](https://github.com/openai/baselines)
 * Install IPython and numpy via pip or conda.
diff --git a/multiagent-cooperative-env/SnakeExample.ipynb b/multiagent-cooperative-env/SnakeExample.ipynb
@@ -1,20 +1,40 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training the multiAgent  Environment"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'tensorflow'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-1-3cef2640497c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeTrainer\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mclear_output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtime\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msleep\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/snakegame-numpy/multiagent-cooperative-env/SnakeTrainer.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeEnv\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msnakeEnv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeModel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mbaselines\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommon\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtf_util\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PLAYING  . . . TIME = 29 s\n",
+      "PLAYER :  0\n",
+      "|. 0 . . .|\n",
+      "|. . . x .|\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "PLAYER :  1\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "|. . . x .|\n",
+      "|. . . . 0|\n",
+      "|. . . . .|\n",
+      "PLAYER :  2\n",
+      "|. . x . .|\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "|. . . . 0|\n"
      ]
     }
    ],
@@ -25,23 +45,56 @@
     "from time import sleep;\n",
     "\n",
     "runner = SnakeTrainer.SnakeRunner()\n",
-    "runner.play()\n",
-    "clear_output()"
+    "runner.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# For Training MultiAgent MultiSnake Cooperative Environment\n",
+    "In this case you will be able to observe multiple snakes fighting for the same resource in the same environment. This is an interesting case to try out, because they have to learn best stratagy to maximize the reward. Which means, ideally after extensive training, the far snake should pave the way for close by snakes (in order to maximize average reward)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PLAYING  . . . TIME = 29 s\n",
+      "PLAYER :  0\n",
+      "|. . . . x|\n",
+      "|. . . . 0|\n",
+      "|. . . . .|\n",
+      "|. . . 2 .|\n",
+      "|. 1 . . .|\n",
+      "PLAYER :  1\n",
+      "|. . . . .|\n",
+      "|. . . . .|\n",
+      "|2 . . . .|\n",
+      "|1 o . . .|\n",
+      "|. 0 . x .|\n",
+      "PLAYER :  2\n",
+      "|. . . . .|\n",
+      "|. 2 . . .|\n",
+      "|. 1 0 . .|\n",
+      "|. . . . .|\n",
+      "|. . x . .|\n"
+     ]
+    }
+   ],
    "source": [
-    "for i in range(50):\n",
-    "    runner.user_step(action=1)\n",
-    "    runner.env.display(verbose=True)\n",
-    "    print (runner.env.get_state())\n",
-    "    sleep(0.5)\n",
-    "    clear_output(wait=True)\n",
-    "    \n"
+    "import SnakeTrainer;\n",
+    "import tensorflow as tf;\n",
+    "from IPython.display import clear_output\n",
+    "from time import sleep;\n",
+    "\n",
+    "runner = SnakeTrainer.SnakeRunner(n_envs=10,n_snakes=3)\n",
+    "runner.train()\n"
    ]
   },
   {
@@ -68,7 +121,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.5.4"
   }
  },
  "nbformat": 4,
diff --git a/multiagent-cooperative-env/SnakeTrainer.py b/multiagent-cooperative-env/SnakeTrainer.py
@@ -90,7 +90,24 @@ class SingleSnakeGameHyperParameters:
     
     SAVE_SKIP = 1000 # Save after 1000 iterations
     DEFAULT_SAVE_PATH = "./model/model.ckpt"
+    
+    def update_params(self,n_envs,n_snakes,grid_length,grid_width):
+        if n_envs != None:
+            self.N_ENVS=n_envs
+        if n_snakes != None:
+            self.N_SNAKES=n_snakes
+        if grid_length != None:
+            self.GRID_LENGTH = grid_length
+            self.GRID_WIDTH = grid_length
+        if grid_width != None:
+            self.GRID_WIDTH = grid_width
+        self.N_BATCH = self.N_STEPS*self.N_ENVS*self.N_SNAKES
+        self.OBS_SHAPE = (None, self.GRID_LENGTH, self.GRID_WIDTH, 1)
+        self.BATCH_OBS_SHAPE = (self.N_BATCH,self.GRID_LENGTH,self.GRID_WIDTH,1)
+        self.STATE_SHAPE = (None,self.N_STATES)
+        self.BATCH_STATE_SHAPE = (self.N_BATCH,self.N_STATES)
 
+            
 class Model:
     def __init__(self,policy,p,has_state):
         """
@@ -191,8 +208,9 @@ class SnakeRunner(object):
     """
     This class will take the Model and interface it with Snake Env
     """
-    def __init__(self):
+    def __init__(self,n_envs=None,n_snakes=None,grid_length=None,grid_width=None):
         self.p = SingleSnakeGameHyperParameters() # Game Parameters;
+        self.p.update_params(n_envs=n_envs,n_snakes=n_snakes,grid_length=grid_length,grid_width=grid_width)
         p = self.p
         self.env = snakeEnv.MultiAgentSnakeGame(length = p.GRID_LENGTH, 
                                 width =  p.GRID_WIDTH,n_envs=self.p.N_ENVS,n_snakes=self.p.N_SNAKES)
diff --git a/preview.png b/preview.png