docs: Add tutorial on error estimation

Teach error analysis of observables using correlated simulation data. Co-authored-by: Jonas Landsgesell <[email protected]> Co-authored-by: Jean-Noël Grad <[email protected]>
espressomd · Aug 2, 2021 · 4856dba · 4856dba
1 parent a0a3165
commit 4856dba
Show file tree

Hide file tree

Showing 10 changed files with 1,334 additions and 0 deletions.
diff --git a/doc/sphinx/introduction.rst b/doc/sphinx/introduction.rst
@@ -307,6 +307,7 @@ The following tutorials are available:
 
 * :file:`lennard_jones`: Modelling of a single-component and a two-component Lennard-Jones liquid.
 * :file:`visualization`: Using the online visualizers of |es|.
+* :file:`error_analysis`: Statistical analysis of simulation results.
 * :file:`charged_system`: Modelling of ion condensation around a charged rod.
 * :file:`ferrofluid`: Modelling a colloidal suspension of magnetic particles.
 * :file:`lattice_boltzmann`: Simulations including hydrodynamic interactions using the lattice-Boltzmann method.

diff --git a/doc/tutorials/CMakeLists.txt b/doc/tutorials/CMakeLists.txt
@@ -100,6 +100,7 @@ add_custom_target(tutorials_python)
 
 # Here: add new directory
 add_subdirectory(lennard_jones)
+add_subdirectory(error_analysis)
 add_subdirectory(charged_system)
 add_subdirectory(lattice_boltzmann)
 add_subdirectory(raspberry_electrophoresis)

diff --git a/doc/tutorials/Readme.md b/doc/tutorials/Readme.md
@@ -12,6 +12,11 @@ physical systems.
 * **Simulate a simple Lennard-Jones liquid**  
   Modelling of a single-component and a two-component Lennard-Jones liquid.  
   [Guide](lennard_jones/lennard_jones.ipynb)
+* **Error_analysis**  
+  Statistical analysis of simulation results
+  Guide
+  [Part 1](error_analysis/error_analysis_part1.ipynb) |
+  [Part 2](error_analysis/error_analysis_part2.ipynb)
 * **Visualization**  
   Using the online visualizers of ESPResSo.  
   [Guide](visualization/visualization.ipynb)

diff --git a/doc/tutorials/error_analysis/CMakeLists.txt b/doc/tutorials/error_analysis/CMakeLists.txt
@@ -0,0 +1,7 @@
+configure_tutorial_target(TARGET tutorial_err DEPENDS
+                          error_analysis_part1.ipynb error_analysis_part2.ipynb)
+
+nb_export(TARGET tutorial_err SUFFIX "1" FILE "error_analysis_part1.ipynb"
+          HTML_RUN)
+nb_export(TARGET tutorial_err SUFFIX "2" FILE "error_analysis_part2.ipynb"
+          HTML_RUN)
diff --git a/doc/tutorials/error_analysis/NotesForTutor.md b/doc/tutorials/error_analysis/NotesForTutor.md
@@ -0,0 +1,18 @@
+# Part 1: Introduction and Binning Analysis
+
+## Learning goals
+
+* Give a brief overview of common measures of dispersion (standard deviation,
+  confidence interval, standard error of the mean)
+* Teach binning analysis and apply it on well-behaved data and on data where
+  it doesn't converge (synthetic data is generated using the AR1 process)
+
+# Part 2: Autocorrelation Analysis
+
+## Learning goals
+
+* Teach autocorrelation analysis
+* Integrate the ACF to determine the standard error of the mean
+* Extract the autocorrelation time and use that information to decrease the
+  observable sampling frequency (and thus reduce the amount of data and
+  improve performance) and increase the simulation time for better statistics
diff --git a/doc/tutorials/error_analysis/error_analysis_part1.ipynb b/doc/tutorials/error_analysis/error_analysis_part1.ipynb
diff --git a/doc/tutorials/error_analysis/error_analysis_part2.ipynb b/doc/tutorials/error_analysis/error_analysis_part2.ipynb
diff --git a/testsuite/scripts/tutorials/CMakeLists.txt b/testsuite/scripts/tutorials/CMakeLists.txt
@@ -25,6 +25,8 @@ add_custom_target(
   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/doc/tutorials
           ${TUTORIALS_DIR} DEPENDS tutorials_python)
 
+tutorial_test(FILE test_error_analysis_part1.py)
+tutorial_test(FILE test_error_analysis_part2.py)
 tutorial_test(FILE test_lennard_jones.py)
 tutorial_test(FILE test_charged_system.py)
 tutorial_test(FILE test_lattice_boltzmann_part2.py)

diff --git a/testsuite/scripts/tutorials/test_error_analysis_part1.py b/testsuite/scripts/tutorials/test_error_analysis_part1.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2021 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import unittest as ut
+import importlib_wrapper
+import numpy as np
+import scipy.signal
+
+tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
+    filepath="@TUTORIALS_DIR@/error_analysis/error_analysis_part1.py")
+
+
+@skipIfMissingFeatures
+class Tutorial(ut.TestCase):
+
+    def ar_1_process(self, n, c, phi, eps):
+        y0 = np.random.normal(loc=c / (1 - phi),
+                              scale=np.sqrt(eps**2 / (1 - phi**2)))
+        y = c + np.random.normal(loc=0.0, scale=eps, size=n - 1)
+        y = np.insert(y, 0, y0)
+        # get an AR(1) process from an ARMA(p,q) process with p=1 and q=0
+        y = scipy.signal.lfilter([1.], [1., -phi], y)
+        return y
+
+    def test_ar1_implementation(self):
+        with self.assertRaises(ValueError):
+            tutorial.ar_1_process(10, 1.0, 1.1, 3.0)
+        with self.assertRaises(ValueError):
+            tutorial.ar_1_process(10, 1.0, -1.1, 3.0)
+
+        for seed in range(5):
+            for eps in [0.5, 1., 2.]:
+                for phi in [0.1, 0.8, 0.999, -0.3]:
+                    c = eps / 2.
+                    np.random.seed(seed)
+                    seq = tutorial.ar_1_process(10, c, phi, eps)
+                    np.random.seed(seed)
+                    ref = self.ar_1_process(10, c, phi, eps)
+                    np.testing.assert_allclose(seq, ref, atol=1e-12, rtol=0)
+
+    def test(self):
+        self.assertLess(abs(tutorial.PHI_1), 1.0)
+        self.assertLess(abs(tutorial.PHI_2), 1.0)
+
+        # Test manual binning analysis
+        ref_bin_avgs = np.mean(
+            tutorial.time_series_1[:tutorial.N_BINS * tutorial.BIN_SIZE].reshape((tutorial.N_BINS, -1)), axis=1)
+        np.testing.assert_allclose(
+            tutorial.bin_avgs,
+            ref_bin_avgs,
+            atol=1e-12,
+            rtol=0)
+        self.assertAlmostEqual(
+            tutorial.avg,
+            np.mean(ref_bin_avgs),
+            delta=1e-10)
+        self.assertAlmostEqual(
+            tutorial.sem,
+            np.std(ref_bin_avgs, ddof=1.5) / np.sqrt(tutorial.N_BINS),
+            delta=1e-10)
+
+        # Test binning analysis function
+        for bin_size in [2, 10, 76, 100]:
+            data = np.random.random(500)
+            n_bins = 500 // bin_size
+            sem = tutorial.do_binning_analysis(data, bin_size)
+            ref_bin_avgs = np.mean(
+                data[:n_bins * bin_size].reshape((n_bins, -1)), axis=1)
+            ref_sem = np.std(ref_bin_avgs, ddof=1.5) / np.sqrt(n_bins)
+            self.assertAlmostEqual(sem, ref_sem, delta=1e-10)
+
+        # The analytic expressions for the AR(1) process are taken from
+        # https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process
+        # (accessed June 2021)
+        SIGMA_1 = np.sqrt(tutorial.EPS_1 ** 2 / (1 - tutorial.PHI_1 ** 2))
+        TAU_EXP_1 = -1 / np.log(tutorial.PHI_1)
+        # The autocorrelation is exponential, thus tau_exp = tau_int, and
+        # therefore
+        SEM_1 = np.sqrt(2 * SIGMA_1 ** 2 * TAU_EXP_1 / tutorial.N_SAMPLES)
+
+        self.assertAlmostEqual(
+            tutorial.fit_params[2],
+            SEM_1,
+            delta=0.1 * SEM_1)
+        self.assertAlmostEqual(tutorial.AN_SEM_1, SEM_1, delta=1e-10 * SEM_1)
+
+        SIGMA_2 = np.sqrt(tutorial.EPS_2 ** 2 / (1 - tutorial.PHI_2 ** 2))
+        TAU_EXP_2 = -1 / np.log(tutorial.PHI_2)
+        SEM_2 = np.sqrt(2 * SIGMA_2 ** 2 * TAU_EXP_2 / tutorial.N_SAMPLES)
+
+        self.assertAlmostEqual(tutorial.AN_SEM_2, SEM_2, delta=1e-10 * SEM_2)
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/scripts/tutorials/test_error_analysis_part2.py b/testsuite/scripts/tutorials/test_error_analysis_part2.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2021 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import unittest as ut
+import importlib_wrapper
+import numpy as np
+import scipy.signal
+
+tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
+    filepath="@TUTORIALS_DIR@/error_analysis/error_analysis_part2.py")
+
+
+@skipIfMissingFeatures
+class Tutorial(ut.TestCase):
+
+    def ar_1_process(self, n, c, phi, eps):
+        y0 = np.random.normal(loc=c / (1 - phi),
+                              scale=np.sqrt(eps**2 / (1 - phi**2)))
+        y = c + np.random.normal(loc=0.0, scale=eps, size=n - 1)
+        y = np.insert(y, 0, y0)
+        # get an AR(1) process from an ARMA(p,q) process with p=1 and q=0
+        y = scipy.signal.lfilter([1.], [1., -phi], y)
+        return y
+
+    def test_ar1_implementation(self):
+        with self.assertRaises(ValueError):
+            tutorial.ar_1_process(10, 1.0, 1.1, 3.0)
+        with self.assertRaises(ValueError):
+            tutorial.ar_1_process(10, 1.0, -1.1, 3.0)
+
+        for seed in range(5):
+            for eps in [0.5, 1., 2.]:
+                for phi in [0.1, 0.8, 0.999, -0.3]:
+                    c = eps / 2.
+                    np.random.seed(seed)
+                    seq = tutorial.ar_1_process(10, c, phi, eps)
+                    np.random.seed(seed)
+                    ref = self.ar_1_process(10, c, phi, eps)
+                    np.testing.assert_allclose(seq, ref, atol=1e-12, rtol=0)
+
+    def test(self):
+        self.assertLess(abs(tutorial.PHI_1), 1.0)
+        self.assertLess(abs(tutorial.PHI_2), 1.0)
+
+        # The analytic expressions for the AR(1) process are taken from
+        # https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process
+        # (accessed June 2021)
+        SIGMA_1 = np.sqrt(tutorial.EPS_1 ** 2 / (1 - tutorial.PHI_1 ** 2))
+        TAU_EXP_1 = -1 / np.log(tutorial.PHI_1)
+        ref_acf_1 = SIGMA_1**2 * \
+            np.exp(-np.arange(1, tutorial.N_MAX, dtype=float) / TAU_EXP_1)
+        np.testing.assert_allclose(tutorial.an_acf_1, ref_acf_1)
+        # The autocorrelation is exponential, thus tau_exp = tau_int, and
+        # therefore
+        N_EFF_1 = tutorial.N_SAMPLES / (2 * TAU_EXP_1)
+        SEM_1 = np.sqrt(SIGMA_1 ** 2 / N_EFF_1)
+
+        self.assertAlmostEqual(tutorial.sem, SEM_1, delta=0.1 * SEM_1)
+        self.assertAlmostEqual(tutorial.N_eff, N_EFF_1, delta=0.1 * N_EFF_1)
+        # for some reason, the integrated autocorrelation time is always higher
+        # than the exponential one, in the tutorial
+        self.assertAlmostEqual(
+            tutorial.tau_int,
+            TAU_EXP_1,
+            delta=0.1 * TAU_EXP_1)
+
+        SIGMA_2 = np.sqrt(tutorial.EPS_2 ** 2 / (1 - tutorial.PHI_2 ** 2))
+        TAU_EXP_2 = -1 / np.log(tutorial.PHI_2)
+        SEM_2 = np.sqrt(2 * SIGMA_2 ** 2 * TAU_EXP_2 / tutorial.N_SAMPLES)
+        # the point of the following value in the tutorial is that it is very
+        # inaccurate, thus the high tolerance
+        self.assertAlmostEqual(tutorial.sem_2, SEM_2, delta=0.2 * SEM_2)
+
+
+if __name__ == "__main__":
+    ut.main()