Skip to content

Commit

Permalink
Add error analysis tutorial (#4174)
Browse files Browse the repository at this point in the history
Description of changes:
- Adds a tutorial that teaches users the basics of error analysis.

This will solve the tutorial side of issue #3958
  • Loading branch information
kodiakhq[bot] authored Aug 2, 2021
2 parents a0a3165 + 4856dba commit 0bf872c
Show file tree
Hide file tree
Showing 10 changed files with 1,334 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/sphinx/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ The following tutorials are available:

* :file:`lennard_jones`: Modelling of a single-component and a two-component Lennard-Jones liquid.
* :file:`visualization`: Using the online visualizers of |es|.
* :file:`error_analysis`: Statistical analysis of simulation results.
* :file:`charged_system`: Modelling of ion condensation around a charged rod.
* :file:`ferrofluid`: Modelling a colloidal suspension of magnetic particles.
* :file:`lattice_boltzmann`: Simulations including hydrodynamic interactions using the lattice-Boltzmann method.
Expand Down
1 change: 1 addition & 0 deletions doc/tutorials/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ add_custom_target(tutorials_python)

# Here: add new directory
add_subdirectory(lennard_jones)
add_subdirectory(error_analysis)
add_subdirectory(charged_system)
add_subdirectory(lattice_boltzmann)
add_subdirectory(raspberry_electrophoresis)
Expand Down
5 changes: 5 additions & 0 deletions doc/tutorials/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ physical systems.
* **Simulate a simple Lennard-Jones liquid**
Modelling of a single-component and a two-component Lennard-Jones liquid.
[Guide](lennard_jones/lennard_jones.ipynb)
* **Error_analysis**
Statistical analysis of simulation results
Guide
[Part 1](error_analysis/error_analysis_part1.ipynb) |
[Part 2](error_analysis/error_analysis_part2.ipynb)
* **Visualization**
Using the online visualizers of ESPResSo.
[Guide](visualization/visualization.ipynb)
Expand Down
7 changes: 7 additions & 0 deletions doc/tutorials/error_analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
configure_tutorial_target(TARGET tutorial_err DEPENDS
error_analysis_part1.ipynb error_analysis_part2.ipynb)

nb_export(TARGET tutorial_err SUFFIX "1" FILE "error_analysis_part1.ipynb"
HTML_RUN)
nb_export(TARGET tutorial_err SUFFIX "2" FILE "error_analysis_part2.ipynb"
HTML_RUN)
18 changes: 18 additions & 0 deletions doc/tutorials/error_analysis/NotesForTutor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Part 1: Introduction and Binning Analysis

## Learning goals

* Give a brief overview of common measures of dispersion (standard deviation,
confidence interval, standard error of the mean)
* Teach binning analysis and apply it on well-behaved data and on data where
it doesn't converge (synthetic data is generated using the AR1 process)

# Part 2: Autocorrelation Analysis

## Learning goals

* Teach autocorrelation analysis
* Integrate the ACF to determine the standard error of the mean
* Extract the autocorrelation time and use that information to decrease the
observable sampling frequency (and thus reduce the amount of data and
improve performance) and increase the simulation time for better statistics
520 changes: 520 additions & 0 deletions doc/tutorials/error_analysis/error_analysis_part1.ipynb

Large diffs are not rendered by default.

581 changes: 581 additions & 0 deletions doc/tutorials/error_analysis/error_analysis_part2.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions testsuite/scripts/tutorials/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ add_custom_target(
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/doc/tutorials
${TUTORIALS_DIR} DEPENDS tutorials_python)

tutorial_test(FILE test_error_analysis_part1.py)
tutorial_test(FILE test_error_analysis_part2.py)
tutorial_test(FILE test_lennard_jones.py)
tutorial_test(FILE test_charged_system.py)
tutorial_test(FILE test_lattice_boltzmann_part2.py)
Expand Down
109 changes: 109 additions & 0 deletions testsuite/scripts/tutorials/test_error_analysis_part1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright (C) 2021 The ESPResSo project
#
# This file is part of ESPResSo.
#
# ESPResSo is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ESPResSo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest as ut
import importlib_wrapper
import numpy as np
import scipy.signal

tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
filepath="@TUTORIALS_DIR@/error_analysis/error_analysis_part1.py")


@skipIfMissingFeatures
class Tutorial(ut.TestCase):

def ar_1_process(self, n, c, phi, eps):
y0 = np.random.normal(loc=c / (1 - phi),
scale=np.sqrt(eps**2 / (1 - phi**2)))
y = c + np.random.normal(loc=0.0, scale=eps, size=n - 1)
y = np.insert(y, 0, y0)
# get an AR(1) process from an ARMA(p,q) process with p=1 and q=0
y = scipy.signal.lfilter([1.], [1., -phi], y)
return y

def test_ar1_implementation(self):
with self.assertRaises(ValueError):
tutorial.ar_1_process(10, 1.0, 1.1, 3.0)
with self.assertRaises(ValueError):
tutorial.ar_1_process(10, 1.0, -1.1, 3.0)

for seed in range(5):
for eps in [0.5, 1., 2.]:
for phi in [0.1, 0.8, 0.999, -0.3]:
c = eps / 2.
np.random.seed(seed)
seq = tutorial.ar_1_process(10, c, phi, eps)
np.random.seed(seed)
ref = self.ar_1_process(10, c, phi, eps)
np.testing.assert_allclose(seq, ref, atol=1e-12, rtol=0)

def test(self):
self.assertLess(abs(tutorial.PHI_1), 1.0)
self.assertLess(abs(tutorial.PHI_2), 1.0)

# Test manual binning analysis
ref_bin_avgs = np.mean(
tutorial.time_series_1[:tutorial.N_BINS * tutorial.BIN_SIZE].reshape((tutorial.N_BINS, -1)), axis=1)
np.testing.assert_allclose(
tutorial.bin_avgs,
ref_bin_avgs,
atol=1e-12,
rtol=0)
self.assertAlmostEqual(
tutorial.avg,
np.mean(ref_bin_avgs),
delta=1e-10)
self.assertAlmostEqual(
tutorial.sem,
np.std(ref_bin_avgs, ddof=1.5) / np.sqrt(tutorial.N_BINS),
delta=1e-10)

# Test binning analysis function
for bin_size in [2, 10, 76, 100]:
data = np.random.random(500)
n_bins = 500 // bin_size
sem = tutorial.do_binning_analysis(data, bin_size)
ref_bin_avgs = np.mean(
data[:n_bins * bin_size].reshape((n_bins, -1)), axis=1)
ref_sem = np.std(ref_bin_avgs, ddof=1.5) / np.sqrt(n_bins)
self.assertAlmostEqual(sem, ref_sem, delta=1e-10)

# The analytic expressions for the AR(1) process are taken from
# https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process
# (accessed June 2021)
SIGMA_1 = np.sqrt(tutorial.EPS_1 ** 2 / (1 - tutorial.PHI_1 ** 2))
TAU_EXP_1 = -1 / np.log(tutorial.PHI_1)
# The autocorrelation is exponential, thus tau_exp = tau_int, and
# therefore
SEM_1 = np.sqrt(2 * SIGMA_1 ** 2 * TAU_EXP_1 / tutorial.N_SAMPLES)

self.assertAlmostEqual(
tutorial.fit_params[2],
SEM_1,
delta=0.1 * SEM_1)
self.assertAlmostEqual(tutorial.AN_SEM_1, SEM_1, delta=1e-10 * SEM_1)

SIGMA_2 = np.sqrt(tutorial.EPS_2 ** 2 / (1 - tutorial.PHI_2 ** 2))
TAU_EXP_2 = -1 / np.log(tutorial.PHI_2)
SEM_2 = np.sqrt(2 * SIGMA_2 ** 2 * TAU_EXP_2 / tutorial.N_SAMPLES)

self.assertAlmostEqual(tutorial.AN_SEM_2, SEM_2, delta=1e-10 * SEM_2)


if __name__ == "__main__":
ut.main()
90 changes: 90 additions & 0 deletions testsuite/scripts/tutorials/test_error_analysis_part2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright (C) 2021 The ESPResSo project
#
# This file is part of ESPResSo.
#
# ESPResSo is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ESPResSo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest as ut
import importlib_wrapper
import numpy as np
import scipy.signal

tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
filepath="@TUTORIALS_DIR@/error_analysis/error_analysis_part2.py")


@skipIfMissingFeatures
class Tutorial(ut.TestCase):

def ar_1_process(self, n, c, phi, eps):
y0 = np.random.normal(loc=c / (1 - phi),
scale=np.sqrt(eps**2 / (1 - phi**2)))
y = c + np.random.normal(loc=0.0, scale=eps, size=n - 1)
y = np.insert(y, 0, y0)
# get an AR(1) process from an ARMA(p,q) process with p=1 and q=0
y = scipy.signal.lfilter([1.], [1., -phi], y)
return y

def test_ar1_implementation(self):
with self.assertRaises(ValueError):
tutorial.ar_1_process(10, 1.0, 1.1, 3.0)
with self.assertRaises(ValueError):
tutorial.ar_1_process(10, 1.0, -1.1, 3.0)

for seed in range(5):
for eps in [0.5, 1., 2.]:
for phi in [0.1, 0.8, 0.999, -0.3]:
c = eps / 2.
np.random.seed(seed)
seq = tutorial.ar_1_process(10, c, phi, eps)
np.random.seed(seed)
ref = self.ar_1_process(10, c, phi, eps)
np.testing.assert_allclose(seq, ref, atol=1e-12, rtol=0)

def test(self):
self.assertLess(abs(tutorial.PHI_1), 1.0)
self.assertLess(abs(tutorial.PHI_2), 1.0)

# The analytic expressions for the AR(1) process are taken from
# https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process
# (accessed June 2021)
SIGMA_1 = np.sqrt(tutorial.EPS_1 ** 2 / (1 - tutorial.PHI_1 ** 2))
TAU_EXP_1 = -1 / np.log(tutorial.PHI_1)
ref_acf_1 = SIGMA_1**2 * \
np.exp(-np.arange(1, tutorial.N_MAX, dtype=float) / TAU_EXP_1)
np.testing.assert_allclose(tutorial.an_acf_1, ref_acf_1)
# The autocorrelation is exponential, thus tau_exp = tau_int, and
# therefore
N_EFF_1 = tutorial.N_SAMPLES / (2 * TAU_EXP_1)
SEM_1 = np.sqrt(SIGMA_1 ** 2 / N_EFF_1)

self.assertAlmostEqual(tutorial.sem, SEM_1, delta=0.1 * SEM_1)
self.assertAlmostEqual(tutorial.N_eff, N_EFF_1, delta=0.1 * N_EFF_1)
# for some reason, the integrated autocorrelation time is always higher
# than the exponential one, in the tutorial
self.assertAlmostEqual(
tutorial.tau_int,
TAU_EXP_1,
delta=0.1 * TAU_EXP_1)

SIGMA_2 = np.sqrt(tutorial.EPS_2 ** 2 / (1 - tutorial.PHI_2 ** 2))
TAU_EXP_2 = -1 / np.log(tutorial.PHI_2)
SEM_2 = np.sqrt(2 * SIGMA_2 ** 2 * TAU_EXP_2 / tutorial.N_SAMPLES)
# the point of the following value in the tutorial is that it is very
# inaccurate, thus the high tolerance
self.assertAlmostEqual(tutorial.sem_2, SEM_2, delta=0.2 * SEM_2)


if __name__ == "__main__":
ut.main()

0 comments on commit 0bf872c

Please sign in to comment.