Updated README, weights from a re-run

aweeraman · Dec 26, 2018 · ace7313 · ace7313
1 parent 669ef77
commit ace7313
Show file tree

Hide file tree

Showing 5 changed files with 151 additions and 9 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,128 @@
+
+# Created by https://www.gitignore.io/api/python
+# Edit at https://www.gitignore.io/?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+### Python Patch ###
+.venv/
+
+Bananas.app
+*.log
+
+# End of https://www.gitignore.io/api/python
diff --git a/README.md b/README.md
@@ -36,6 +36,7 @@ To run the pre-trained agent, execute the following:
 
 ```
 $ python bananas.py --run
+(drltest1) sendai:bananas anuradha$ python bananas.py --run
 Mono path[0] = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/Resources/Data/Managed'
 Mono config path = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/MonoBleedingEdge/etc'
 INFO:unityagents:
@@ -64,14 +65,13 @@ States look like: [1.         0.         0.         0.         0.84408134 0.
  0.         1.         0.         0.         0.31969345 0.
  0.        ]
 States have length: 37
-Score: 15.0
+Score: 14.0
 ```
 
 To customize hyperparameters and train the agent, execute the following:
 
 ```
 $ python bananas.py --train
-(drl) sendai:bananas anuradha$ python bananas.py --train
 Mono path[0] = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/Resources/Data/Managed'
 Mono config path = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/MonoBleedingEdge/etc'
 INFO:unityagents:
@@ -97,13 +97,27 @@ Episode 200	Average Score: 4.03
 Episode 300	Average Score: 7.21
 Episode 400	Average Score: 9.00
 Episode 500	Average Score: 11.44
-Episode 600	Average Score: 13.50
-Episode 700	Average Score: 15.07
-Episode 800	Average Score: 15.16
-Episode 900	Average Score: 15.90
-Episode 1000	Average Score: 16.83
+Episode 574	Average Score: 13.02
+Environment solved in 474 episodes!	Average Score: 13.02
 ```
 
+# Environment details
+
+The state space has 37 dimensions and consists of:
+* the agent's velocity
+* objects in the agents forward field of view
+
+The agent receives a reward of +1 for a yellow banana, and -1 for blue banana. The goal is therefore to
+maximize the collection of yellow bananas while minimizing / avoiding blue ones.
+
+The action space for the agent consists of the following four possible actions:
+* 0 - walk forward
+* 1 - walk backward
+* 2 - turn left
+* 3 - turn right
+
+The agent must collect a reward of +13 or more to solve the problem.
+
 # Troubleshooting
 
 If you run into an error such as the following when training the agent:

diff --git a/bananas.py b/bananas.py
@@ -9,7 +9,7 @@
 import matplotlib.pyplot as plt
 
 # https://github.com/udacity/deep-reinforcement-learning/blob/master/dqn/solution/Deep_Q_Network_Solution.ipynb
-def dqn(n_episodes=1000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
+def dqn(n_episodes=1000, max_t=750, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
     """Deep Q-Learning.
 
     Params
@@ -45,7 +45,7 @@ def dqn(n_episodes=1000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.99
         if i_episode % 100 == 0:
             print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
             torch.save(agent.qnetwork_local.state_dict(), 'weights.pth')
-        if np.mean(scores_window)>=200.0:
+        if np.mean(scores_window)>=13.0:  # consider done when the target of 13 is reached
             print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
             torch.save(agent.qnetwork_local.state_dict(), 'weights.pth')
             break

diff --git a/graph.png b/graph.png
diff --git a/weights.pth b/weights.pth