|
1 | 1 | {
|
2 | 2 | "cells": [
|
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "# Training the multiAgent Environment" |
| 8 | + ] |
| 9 | + }, |
3 | 10 | {
|
4 | 11 | "cell_type": "code",
|
5 | 12 | "execution_count": 1,
|
6 | 13 | "metadata": {},
|
7 | 14 | "outputs": [
|
8 | 15 | {
|
9 |
| - "ename": "ModuleNotFoundError", |
10 |
| - "evalue": "No module named 'tensorflow'", |
11 |
| - "output_type": "error", |
12 |
| - "traceback": [ |
13 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
14 |
| - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", |
15 |
| - "\u001b[0;32m<ipython-input-1-3cef2640497c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeTrainer\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mclear_output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtime\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msleep\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
16 |
| - "\u001b[0;32m~/snakegame-numpy/multiagent-cooperative-env/SnakeTrainer.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeEnv\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msnakeEnv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSnakeModel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mbaselines\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommon\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtf_util\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
17 |
| - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'" |
| 16 | + "name": "stdout", |
| 17 | + "output_type": "stream", |
| 18 | + "text": [ |
| 19 | + "PLAYING . . . TIME = 29 s\n", |
| 20 | + "PLAYER : 0\n", |
| 21 | + "|. 0 . . .|\n", |
| 22 | + "|. . . x .|\n", |
| 23 | + "|. . . . .|\n", |
| 24 | + "|. . . . .|\n", |
| 25 | + "|. . . . .|\n", |
| 26 | + "PLAYER : 1\n", |
| 27 | + "|. . . . .|\n", |
| 28 | + "|. . . . .|\n", |
| 29 | + "|. . . x .|\n", |
| 30 | + "|. . . . 0|\n", |
| 31 | + "|. . . . .|\n", |
| 32 | + "PLAYER : 2\n", |
| 33 | + "|. . x . .|\n", |
| 34 | + "|. . . . .|\n", |
| 35 | + "|. . . . .|\n", |
| 36 | + "|. . . . .|\n", |
| 37 | + "|. . . . 0|\n" |
18 | 38 | ]
|
19 | 39 | }
|
20 | 40 | ],
|
|
25 | 45 | "from time import sleep;\n",
|
26 | 46 | "\n",
|
27 | 47 | "runner = SnakeTrainer.SnakeRunner()\n",
|
28 |
| - "runner.play()\n", |
29 |
| - "clear_output()" |
| 48 | + "runner.train()" |
| 49 | + ] |
| 50 | + }, |
| 51 | + { |
| 52 | + "cell_type": "markdown", |
| 53 | + "metadata": {}, |
| 54 | + "source": [ |
| 55 | + "# For Training MultiAgent MultiSnake Cooperative Environment\n", |
| 56 | + "In this case you will be able to observe multiple snakes fighting for the same resource in the same environment. This is an interesting case to try out, because they have to learn best stratagy to maximize the reward. Which means, ideally after extensive training, the far snake should pave the way for close by snakes (in order to maximize average reward)." |
30 | 57 | ]
|
31 | 58 | },
|
32 | 59 | {
|
33 | 60 | "cell_type": "code",
|
34 |
| - "execution_count": null, |
| 61 | + "execution_count": 1, |
35 | 62 | "metadata": {},
|
36 |
| - "outputs": [], |
| 63 | + "outputs": [ |
| 64 | + { |
| 65 | + "name": "stdout", |
| 66 | + "output_type": "stream", |
| 67 | + "text": [ |
| 68 | + "PLAYING . . . TIME = 29 s\n", |
| 69 | + "PLAYER : 0\n", |
| 70 | + "|. . . . x|\n", |
| 71 | + "|. . . . 0|\n", |
| 72 | + "|. . . . .|\n", |
| 73 | + "|. . . 2 .|\n", |
| 74 | + "|. 1 . . .|\n", |
| 75 | + "PLAYER : 1\n", |
| 76 | + "|. . . . .|\n", |
| 77 | + "|. . . . .|\n", |
| 78 | + "|2 . . . .|\n", |
| 79 | + "|1 o . . .|\n", |
| 80 | + "|. 0 . x .|\n", |
| 81 | + "PLAYER : 2\n", |
| 82 | + "|. . . . .|\n", |
| 83 | + "|. 2 . . .|\n", |
| 84 | + "|. 1 0 . .|\n", |
| 85 | + "|. . . . .|\n", |
| 86 | + "|. . x . .|\n" |
| 87 | + ] |
| 88 | + } |
| 89 | + ], |
37 | 90 | "source": [
|
38 |
| - "for i in range(50):\n", |
39 |
| - " runner.user_step(action=1)\n", |
40 |
| - " runner.env.display(verbose=True)\n", |
41 |
| - " print (runner.env.get_state())\n", |
42 |
| - " sleep(0.5)\n", |
43 |
| - " clear_output(wait=True)\n", |
44 |
| - " \n" |
| 91 | + "import SnakeTrainer;\n", |
| 92 | + "import tensorflow as tf;\n", |
| 93 | + "from IPython.display import clear_output\n", |
| 94 | + "from time import sleep;\n", |
| 95 | + "\n", |
| 96 | + "runner = SnakeTrainer.SnakeRunner(n_envs=10,n_snakes=3)\n", |
| 97 | + "runner.train()\n" |
45 | 98 | ]
|
46 | 99 | },
|
47 | 100 | {
|
|
68 | 121 | "name": "python",
|
69 | 122 | "nbconvert_exporter": "python",
|
70 | 123 | "pygments_lexer": "ipython3",
|
71 |
| - "version": "3.6.4" |
| 124 | + "version": "3.5.4" |
72 | 125 | }
|
73 | 126 | },
|
74 | 127 | "nbformat": 4,
|
|
0 commit comments