diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b3645bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,128 @@ + +# Created by https://www.gitignore.io/api/python +# Edit at https://www.gitignore.io/?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### Python Patch ### +.venv/ + +Bananas.app +*.log + +# End of https://www.gitignore.io/api/python diff --git a/README.md b/README.md index 18d96e1..97371a3 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ To run the pre-trained agent, execute the following: ``` $ python bananas.py --run +(drltest1) sendai:bananas anuradha$ python bananas.py --run Mono path[0] = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/Resources/Data/Managed' Mono config path = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/MonoBleedingEdge/etc' INFO:unityagents: @@ -64,14 +65,13 @@ States look like: [1. 0. 0. 0. 0.84408134 0. 0. 1. 0. 0. 0.31969345 0. 0. ] States have length: 37 -Score: 15.0 +Score: 14.0 ``` To customize hyperparameters and train the agent, execute the following: ``` $ python bananas.py --train -(drl) sendai:bananas anuradha$ python bananas.py --train Mono path[0] = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/Resources/Data/Managed' Mono config path = '/Users/anuradha/ninsei/udacity/bananas/Banana.app/Contents/MonoBleedingEdge/etc' INFO:unityagents: @@ -97,13 +97,27 @@ Episode 200 Average Score: 4.03 Episode 300 Average Score: 7.21 Episode 400 Average Score: 9.00 Episode 500 Average Score: 11.44 -Episode 600 Average Score: 13.50 -Episode 700 Average Score: 15.07 -Episode 800 Average Score: 15.16 -Episode 900 Average Score: 15.90 -Episode 1000 Average Score: 16.83 +Episode 574 Average Score: 13.02 +Environment solved in 474 episodes! Average Score: 13.02 ``` +# Environment details + +The state space has 37 dimensions and consists of: +* the agent's velocity +* objects in the agents forward field of view + +The agent receives a reward of +1 for a yellow banana, and -1 for blue banana. The goal is therefore to +maximize the collection of yellow bananas while minimizing / avoiding blue ones. + +The action space for the agent consists of the following four possible actions: +* 0 - walk forward +* 1 - walk backward +* 2 - turn left +* 3 - turn right + +The agent must collect a reward of +13 or more to solve the problem. + # Troubleshooting If you run into an error such as the following when training the agent: diff --git a/bananas.py b/bananas.py index b8a63bf..5f684ee 100644 --- a/bananas.py +++ b/bananas.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt # https://github.com/udacity/deep-reinforcement-learning/blob/master/dqn/solution/Deep_Q_Network_Solution.ipynb -def dqn(n_episodes=1000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995): +def dqn(n_episodes=1000, max_t=750, eps_start=1.0, eps_end=0.01, eps_decay=0.995): """Deep Q-Learning. Params @@ -45,7 +45,7 @@ def dqn(n_episodes=1000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.99 if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'weights.pth') - if np.mean(scores_window)>=200.0: + if np.mean(scores_window)>=13.0: # consider done when the target of 13 is reached print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'weights.pth') break diff --git a/graph.png b/graph.png index df692be..faa83f5 100644 Binary files a/graph.png and b/graph.png differ diff --git a/weights.pth b/weights.pth index a6453d5..af70317 100644 Binary files a/weights.pth and b/weights.pth differ