Merge pull request #251 from lephare-photoz/198-fast-last-vector-dime…

…nsion-being-used-by-different-threads-might-cause-false-cache-sharing 198 fast last vector dimension being used by different threads might cause false cache sharing
lephare-photoz · Dec 10, 2024 · 1164c3c · 1164c3c
2 parents f5ecbfa + 6e96730
commit 1164c3c
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 73 deletions.
diff --git a/docs/notebooks/Testing_fit_of_one_object_of_the_catalogue.ipynb b/docs/notebooks/Testing_fit_of_one_object_of_the_catalogue.ipynb
@@ -303,7 +303,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "src.rm_discrepant(photz.fullLib, photz.flux, valid, funz0, bp, 500.0, True)"
+    "src.rm_discrepant(photz.fullLib, photz.flux, valid, funz0, bp, 500.0)"
    ]
   },
   {
@@ -755,7 +755,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.4"
   }
  },
  "nbformat": 4,

diff --git a/docs/test_suite/test_suite.sh b/docs/test_suite/test_suite.sh
diff --git a/src/lib/SED.h b/src/lib/SED.h
@@ -17,6 +17,7 @@
 #include "onesource.h"
 #include "opa.h"
 
+//! types of object that LePHARE can treat distinctively
 enum object_type { GAL, QSO, STAR };
 
 /*
@@ -35,9 +36,16 @@ class SED {
   string name;
   bool has_emlines;  ///< True if the emission lines have been computed, false
                      ///< if not
-  int nummod;
+
+  //! object type of the SED
   object_type nlib;
-  int index, index_z0, posz = -1;
+
+  int nummod,    ///< index in the initial list of rest frame SEDs
+      index,     ///< index in the full list of SED, redshifted, and modified by
+                 ///< extinction, etc...
+      index_z0;  ///< index in the full list of SEDs corresponding to the z=0
+                 ///< version of the current SED.
+
   double red, chi2 = HIGH_CHI2, dm, lnir, luv, lopt, inter;
   double mass, age, sfr, ssfr,
       ltir;  // need to put it out of GalSED since used in the PDF without
@@ -100,6 +108,9 @@ class SED {
     }
   }
 
+  //! Return the object type
+  object_type get_object_type() const { return nlib; }
+
   //! Return true if SED is of object_type GAL, false if not
   bool is_gal() { return nlib == GAL ? true : false; }
 

diff --git a/src/lib/_bindings.cc b/src/lib/_bindings.cc
@@ -322,6 +322,8 @@ PYBIND11_MODULE(_lephare, mod) {
            static_cast<void (onesource::*)(
                const string &, const vector<double>, const vector<double>,
                const long, const double, const string)>(&onesource::readsource))
+      .def("set_verbosity", &onesource::set_verbosity)
+      .def("get_verbosity", &onesource::get_verbosity)
       .def("fltUsed", &onesource::fltUsed)
       .def("convertFlux", &onesource::convertFlux)
       .def("convertMag", &onesource::convertMag)

diff --git a/src/lib/onesource.cpp b/src/lib/onesource.cpp
@@ -132,7 +132,8 @@ void onesource::fltUsed(const long gbcont, const long contforb,
     // Count the bands used as upper-limits
     if (busul[k] == 1) nbul++;
   }
-  if (nf == 0) cout << "WARNING: No scaling --> No z " << spec << endl;
+  if (nf == 0 && verbose)
+    cout << "WARNING: No scaling --> No z " << spec << endl;
 }
 
 /*
@@ -373,8 +374,9 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
 #ifdef _OPENMP
   number_threads = omp_get_max_threads();
 #endif
-  vector<vector<double>> locChi2(3, vector<double>(number_threads, HIGH_CHI2));
-  vector<vector<int>> locInd(3, vector<int>(number_threads, -1));
+  vector<vector<double>> chi2_vals(number_threads,
+                                   vector<double>(3, HIGH_CHI2));
+  vector<vector<int>> chi2_idx(number_threads, vector<int>(3, -1));
 
   // Compute some quantities linked to ab and sab to save computational time in
   // the fit. invsab = busnorma / sab busnorma here ensures that all the
@@ -399,94 +401,92 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
   // parrallellize over each SED
   vector<double> chi2loc(fulllib.size(), HIGH_CHI2),
       dmloc(fulllib.size(), -999.);
-  size_t il;
+
 #ifdef _OPENMP
   // double start = omp_get_wtime();
 #pragma omp parallel shared(fulllib)                                    \
     firstprivate(s2n, invsab, invsabSq, abinvsabSq, imagm, nbul, busul, \
-                     priorLib, number_threads, thread_id) private(il)
+                     priorLib, number_threads, thread_id)
   {
     // Catch the name of the local thread in the parrallelisation
     thread_id = omp_get_thread_num();
 #endif
 
     // Compute normalisation and chi2
 #pragma omp for schedule(static, 10000)
-    for (size_t i = 0; i < va.size(); i++) {
+    for (size_t v = 0; v < va.size(); v++) {
       // index to be considered because of ZFIX=YES
-      il = va[i];
+      size_t i = va[v];
       // Initialize
-      chi2loc[il] = HIGH_CHI2;
-      dmloc[il] = -999.;
+      chi2loc[i] = HIGH_CHI2;
+      dmloc[i] = -999.;
 
       // Measurement of scaling factor dm only with (fobs>flim), dchi2/ddm = 0
       double avmago = 0., avmagt = 0.;
       for (size_t k = 0; k < imagm; k++) {
-        double fluxin = flux[il][k];
+        double fluxin = flux[i][k];
         avmago += fluxin * abinvsabSq[k];
         avmagt += fluxin * fluxin * invsabSq[k];
       }
       // Normalisation
-      if (avmagt > 0) dmloc[il] = avmago / avmagt;
+      if (avmagt > 0) dmloc[i] = avmago / avmagt;
 
       // Measurement of chi^2
-      chi2loc[il] = 0;
+      chi2loc[i] = 0;
       for (size_t k = 0; k < imagm; k++) {
-        double inter = s2n[k] - dmloc[il] * flux[il][k] * invsab[k];
-        chi2loc[il] += inter * inter;
+        double inter = s2n[k] - dmloc[i] * flux[i][k] * invsab[k];
+        chi2loc[i] += inter * inter;
       }
 
       // Upper-limits. Check first if some bands have upper-limits, before
       // applying the condition
       if (nbul > 0) {
         for (size_t k = 0; k < imagm; k++) {
-          if ((dmloc[il] * busul[k] * flux[il][k]) > ab[k] && busnorma[k] == 1)
-            chi2loc[il] = HIGH_CHI2;
+          if ((dmloc[i] * busul[k] * flux[i][k]) > ab[k] && busnorma[k] == 1)
+            chi2loc[i] = HIGH_CHI2;
         }
       }
 
       // Model rejection based on prior
       // Abs mag rejection
       double reds;
       int libtype;
-      reds = (fulllib[il])->red;
-      libtype = (fulllib[il])->nlib;
+      SED *sed = fulllib[i];
+      reds = sed->red;
+      libtype = sed->nlib;
       // Abs Magnitude from model @ z=0 for rejection for galaxies and AGN
       if ((mabsGALprior && libtype == 0) || (mabsAGNprior && libtype == 1)) {
         // predicted magnitudes within the babs filter renormalized by the
         // scaling
         double abs_mag;
-        abs_mag = (fulllib[il])->mag0 - 2.5 * log10(dmloc[il]);
+        abs_mag = sed->mag0 - 2.5 * log10(dmloc[i]);
         // specific case when the redshift is 0
         if (reds < 1.e-10) abs_mag = abs_mag - funz0;
         // Galaxy rejection
         if ((abs_mag <= priorLib[0] || abs_mag >= priorLib[1]) && libtype == 0)
-          chi2loc[il] = HIGH_CHI2;
+          chi2loc[i] = HIGH_CHI2;
         // AGN rejection
         if ((abs_mag <= priorLib[2] || abs_mag >= priorLib[3]) && libtype == 1)
-          chi2loc[il] = HIGH_CHI2;
+          chi2loc[i] = HIGH_CHI2;
       }
       // Prior N(z)
       if (bp[0] >= 0 && libtype == 0) {
-        double pweight =
-            nzprior((fulllib[il])->luv, (fulllib[il])->lnir, reds, bp);
-        chi2loc[il] = chi2loc[il] - 2. * log(pweight);
+        double pweight = nzprior(sed->luv, sed->lnir, reds, bp);
+        chi2loc[i] = chi2loc[i] - 2. * log(pweight);
       }
 
-      // keep the minimum chi2
-      if (chi2loc[il] < HIGH_CHI2) {
-        int nlibloc = (fulllib[il])->nlib;
-        // If local minimum inside the thread, store the new minimum for the
-        // thread done per type s (0 gal, 1 AGN, 2 stars)
-        if (locChi2[nlibloc][thread_id] > chi2loc[il]) {
-          locChi2[nlibloc][thread_id] = chi2loc[il];
-          locInd[nlibloc][thread_id] = (fulllib[il])->index;
+      // keep track of the minimum chi2 for each object type, over the threads
+      if (chi2loc[i] < HIGH_CHI2) {
+        object_type type = sed->get_object_type();
+        if (chi2_vals[thread_id][type] > chi2loc[i]) {
+          chi2_vals[thread_id][type] = chi2loc[i];
+          chi2_idx[thread_id][type] = sed->index;
         }
       }
 
       // Write the chi2
-      fulllib[il]->chi2 = chi2loc[il];
-      fulllib[il]->dm = dmloc[il];
+      sed->chi2 = chi2loc[i];
+      sed->dm = dmloc[i];
     }
 
 #ifdef _OPENMP
@@ -501,9 +501,9 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
   for (int k = 0; k < 3; k++) {
     for (int j = 0; j < number_threads; j++) {
       // Minimum over the full redshift range for the galaxies
-      if (chimin[k] > locChi2[k][j]) {
-        chimin[k] = locChi2[k][j];
-        indmin[k] = locInd[k][j];
+      if (chimin[k] > chi2_vals[j][k]) {
+        chimin[k] = chi2_vals[j][k];
+        indmin[k] = chi2_idx[j][k];
       }
     }
   }
@@ -647,8 +647,7 @@ double onesource::nzprior(const double luv, const double lnir,
 void onesource::rm_discrepant(vector<SED *> &fulllib,
                               const vector<vector<double>> &flux,
                               const vector<size_t> &va, const double funz0,
-                              const array<int, 2> bp, double thresholdChi2,
-                              bool verbose) {
+                              const array<int, 2> bp, double thresholdChi2) {
   size_t imagm = busnorma.size();
   double newmin, improvedChi2;
   // Start with the best chi2 among the libraries
@@ -750,8 +749,9 @@ void onesource::fitIR(vector<SED *> &fulllibIR,
         // Check that the normalisation should be computed (if the scaling
         // should be free)
         if (nbusIR < 1) {
-          cout << "WARNING: No scaling in IR " << spec << " " << nbusIR << " "
-               << endl;
+          if (verbose)
+            cout << "WARNING: No scaling in IR " << spec << " " << nbusIR << " "
+                 << endl;
         } else {
           dmloc = avmago / avmagt;
         }
@@ -832,10 +832,10 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
   // Do a local minimisation per thread (store chi2 and index) dim 1: type, dim
   // 2: thread, 3: index of the redshift grid
   vector<vector<vector<double>>> locChi2(
-      3,
-      vector<vector<double>>(dimzg, vector<double>(number_threads, HIGH_CHI2)));
+      number_threads,
+      vector<vector<double>>(3, vector<double>(dimzg, HIGH_CHI2)));
   vector<vector<vector<int>>> locInd(
-      3, vector<vector<int>>(dimzg, vector<int>(number_threads, -1)));
+      number_threads, vector<vector<int>>(3, vector<int>(dimzg, -1)));
 
   // to create the marginalized PDF
   int pos = 0;
@@ -914,7 +914,7 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
       SED *sed = fulllib[il];
       // Check that the model has a defined probability
       if (sed->chi2 < HIGH_CHI2) {
-        object_type nlibloc = sed->nlib;
+        object_type nlibloc = sed->get_object_type();
 
         // Marginalization for the galaxies
         if (nlibloc == object_type::GAL) {
@@ -1004,39 +1004,35 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
         if (chi2loc < HIGH_CHI2) {
           // 11: BAYZG
           int poszloc = pdfbayzg.index(sed->red);
-          object_type nlibloc = sed->nlib;
+          object_type nlibloc = sed->get_object_type();
           int indloc = sed->index;
           // If local minimum inside the thread, store the new minimum for the
           // thread
-          if (locChi2[nlibloc][poszloc][thread_id] > chi2loc) {
-            locChi2[nlibloc][poszloc][thread_id] = chi2loc;
-            locInd[nlibloc][poszloc][thread_id] = indloc;
+          if (locChi2[thread_id][nlibloc][poszloc] > chi2loc) {
+            locChi2[thread_id][nlibloc][poszloc] = chi2loc;
+            locInd[thread_id][nlibloc][poszloc] = indloc;
           }
         }
       }
 
       // Find the minimum for each redshift step by collapsing the threads
 #pragma omp for
       for (int i = 0; i < dimzg; i++) {
-        auto &loc0chi2 = locChi2[0][i];
-        auto &loc1chi2 = locChi2[1][i];
-        auto &loc0ind = locInd[0][i];
-        auto &loc1ind = locInd[1][i];
         for (int j = 0; j < number_threads; j++) {
 // 0:["MASS"] / 1:["SFR"] / 2:["SSFR"] / 3:["LDUST"] / 4:["LIR"] /
 // 5:["AGE"] / 6:["COL1"] / 7:["COL2"] / 8:["MREF"]/ 9:["MIN_ZG"] /
 // 10:["MIN_ZQ"] / 11:["BAY_ZG"] / 11:["BAY_ZQ"] look for the new
 // minimum among the threads / Galaxies
 #pragma omp critical
           {
-            if (pdfminzg.chi2[i] > loc0chi2[j]) {
-              pdfminzg.chi2[i] = loc0chi2[j];
-              pdfminzg.ind[i] = loc0ind[j];
+            if (pdfminzg.chi2[i] > locChi2[j][0][i]) {
+              pdfminzg.chi2[i] = locChi2[j][0][i];
+              pdfminzg.ind[i] = locInd[j][0][i];
             }
             // look for the new minimum among the threads / AGN
-            if (pdfminzq.chi2[i] > loc1chi2[j]) {
-              pdfminzq.chi2[i] = loc1chi2[j];
-              pdfminzq.ind[i] = loc1ind[j];
+            if (pdfminzq.chi2[i] > locChi2[j][1][i]) {
+              pdfminzq.chi2[i] = locChi2[j][1][i];
+              pdfminzq.ind[i] = locInd[j][1][i];
             }
           }
         }
@@ -2288,7 +2284,7 @@ void onesource::writeFullChi(vector<SED *> &fulllib) {
     // Normalisation
     sca = fulllib[k]->dm;
     // Write
-    stochi << fulllib[k]->nlib << " ";
+    stochi << fulllib[k]->get_object_type() << " ";
     stochi << fulllib[k]->red << " ";
     stochi << fulllib[k]->nummod << " ";
     stochi << fulllib[k]->age << " ";

diff --git a/src/lib/onesource.h b/src/lib/onesource.h
@@ -39,6 +39,8 @@ represents an object from a catalogue, and manages its fitting to an SED.
 */
 class onesource {
  private:
+  bool verbose;
+
  public:
   //{"MASS_BEST","SFR_BEST","SSFR_BEST","LDUST_BEST","LUM_TIR_BEST","AGE_BEST","EBV_BEST","EXTLAW_BEST","LUM_NUV_BEST","LUM_R_BEST","LUM_K_BEST",}
   unordered_map<string, double> results = {
@@ -160,6 +162,17 @@ class onesource {
     busnorma.clear();
   }
 
+  //! Set verbosity
+  /*!
+    @param v bool specifying whether output should be verbose or not.
+   */
+  inline void set_verbosity(const bool v) { verbose = v; }
+  //! Get verbosity
+  /*!
+    @return bool specifying whether output is verbose or not.
+   */
+  inline bool get_verbosity() const { return verbose; }
+
   // Prototype
   void readsource(const string &identifier, const vector<double> vals,
                   const vector<double> err_vals, const long context,
@@ -184,8 +197,7 @@ class onesource {
                  const array<int, 2> bp);
   void rm_discrepant(vector<SED *> &fulllib, const vector<vector<double>> &flux,
                      const vector<size_t> &valid, const double funz0,
-                     const array<int, 2> bp, double thresholdChi2,
-                     bool verbose);
+                     const array<int, 2> bp, double thresholdChi2);
   void write_out(vector<SED *> &fulllib, vector<SED *> &fulllibIR,
                  ofstream &stout, vector<string> outkeywords);
   void write_pdz_header(vector<string> pdztype,

diff --git a/src/lib/photoz_lib.cpp b/src/lib/photoz_lib.cpp
@@ -959,6 +959,7 @@ vector<onesource *> PhotoZ::read_autoadapt_sources() {
     if (check_first_char(line)) {
       // Construct one objet
       onesource *oneObj = yield(nobj, line);
+      oneObj->set_verbosity(verbose);
 
       // Keep only sources with a spectroscopic redshift
       if (oneObj->zs > adzmin && oneObj->zs < adzmax) {
@@ -1413,6 +1414,7 @@ vector<onesource *> PhotoZ::read_photoz_sources() {
 
       // Generate one objet
       onesource *oneObj = yield(nobj, line);
+      oneObj->set_verbosity(verbose);
 
       // Use zspec from external file
       // open the external file with zspec
@@ -1601,8 +1603,7 @@ void PhotoZ::run_photoz(vector<onesource *> sources, const vector<double> &a0,
     oneObj->fit(fullLib, flux, valid, funz0, bp);
     // Try to remove some bands to improve the chi2, only as long as the chi2 is
     // above a threshold
-    oneObj->rm_discrepant(fullLib, flux, valid, funz0, bp, thresholdChi2,
-                          verbose);
+    oneObj->rm_discrepant(fullLib, flux, valid, funz0, bp, thresholdChi2);
     // Generate the marginalized PDF (z+physical parameters) from the chi2
     // stored in each SED
     oneObj->generatePDF(fullLib, valid, fltColRF, fltREF, zfix);