Skip to content

Commit

Permalink
Merge pull request #251 from lephare-photoz/198-fast-last-vector-dime…
Browse files Browse the repository at this point in the history
…nsion-being-used-by-different-threads-might-cause-false-cache-sharing

198 fast last vector dimension being used by different threads might cause false cache sharing
  • Loading branch information
johannct authored Dec 10, 2024
2 parents f5ecbfa + 6e96730 commit 1164c3c
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@
"metadata": {},
"outputs": [],
"source": [
"src.rm_discrepant(photz.fullLib, photz.flux, valid, funz0, bp, 500.0, True)"
"src.rm_discrepant(photz.fullLib, photz.flux, valid, funz0, bp, 500.0)"
]
},
{
Expand Down Expand Up @@ -755,7 +755,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down
Empty file modified docs/test_suite/test_suite.sh
100644 → 100755
Empty file.
15 changes: 13 additions & 2 deletions src/lib/SED.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "onesource.h"
#include "opa.h"

//! types of object that LePHARE can treat distinctively
enum object_type { GAL, QSO, STAR };

/*
Expand All @@ -35,9 +36,16 @@ class SED {
string name;
bool has_emlines; ///< True if the emission lines have been computed, false
///< if not
int nummod;

//! object type of the SED
object_type nlib;
int index, index_z0, posz = -1;

int nummod, ///< index in the initial list of rest frame SEDs
index, ///< index in the full list of SED, redshifted, and modified by
///< extinction, etc...
index_z0; ///< index in the full list of SEDs corresponding to the z=0
///< version of the current SED.

double red, chi2 = HIGH_CHI2, dm, lnir, luv, lopt, inter;
double mass, age, sfr, ssfr,
ltir; // need to put it out of GalSED since used in the PDF without
Expand Down Expand Up @@ -100,6 +108,9 @@ class SED {
}
}

//! Return the object type
object_type get_object_type() const { return nlib; }

//! Return true if SED is of object_type GAL, false if not
bool is_gal() { return nlib == GAL ? true : false; }

Expand Down
2 changes: 2 additions & 0 deletions src/lib/_bindings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ PYBIND11_MODULE(_lephare, mod) {
static_cast<void (onesource::*)(
const string &, const vector<double>, const vector<double>,
const long, const double, const string)>(&onesource::readsource))
.def("set_verbosity", &onesource::set_verbosity)
.def("get_verbosity", &onesource::get_verbosity)
.def("fltUsed", &onesource::fltUsed)
.def("convertFlux", &onesource::convertFlux)
.def("convertMag", &onesource::convertMag)
Expand Down
116 changes: 56 additions & 60 deletions src/lib/onesource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ void onesource::fltUsed(const long gbcont, const long contforb,
// Count the bands used as upper-limits
if (busul[k] == 1) nbul++;
}
if (nf == 0) cout << "WARNING: No scaling --> No z " << spec << endl;
if (nf == 0 && verbose)
cout << "WARNING: No scaling --> No z " << spec << endl;
}

/*
Expand Down Expand Up @@ -373,8 +374,9 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
#ifdef _OPENMP
number_threads = omp_get_max_threads();
#endif
vector<vector<double>> locChi2(3, vector<double>(number_threads, HIGH_CHI2));
vector<vector<int>> locInd(3, vector<int>(number_threads, -1));
vector<vector<double>> chi2_vals(number_threads,
vector<double>(3, HIGH_CHI2));
vector<vector<int>> chi2_idx(number_threads, vector<int>(3, -1));

// Compute some quantities linked to ab and sab to save computational time in
// the fit. invsab = busnorma / sab busnorma here ensures that all the
Expand All @@ -399,94 +401,92 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
// parrallellize over each SED
vector<double> chi2loc(fulllib.size(), HIGH_CHI2),
dmloc(fulllib.size(), -999.);
size_t il;

#ifdef _OPENMP
// double start = omp_get_wtime();
#pragma omp parallel shared(fulllib) \
firstprivate(s2n, invsab, invsabSq, abinvsabSq, imagm, nbul, busul, \
priorLib, number_threads, thread_id) private(il)
priorLib, number_threads, thread_id)
{
// Catch the name of the local thread in the parrallelisation
thread_id = omp_get_thread_num();
#endif

// Compute normalisation and chi2
#pragma omp for schedule(static, 10000)
for (size_t i = 0; i < va.size(); i++) {
for (size_t v = 0; v < va.size(); v++) {
// index to be considered because of ZFIX=YES
il = va[i];
size_t i = va[v];
// Initialize
chi2loc[il] = HIGH_CHI2;
dmloc[il] = -999.;
chi2loc[i] = HIGH_CHI2;
dmloc[i] = -999.;

// Measurement of scaling factor dm only with (fobs>flim), dchi2/ddm = 0
double avmago = 0., avmagt = 0.;
for (size_t k = 0; k < imagm; k++) {
double fluxin = flux[il][k];
double fluxin = flux[i][k];
avmago += fluxin * abinvsabSq[k];
avmagt += fluxin * fluxin * invsabSq[k];
}
// Normalisation
if (avmagt > 0) dmloc[il] = avmago / avmagt;
if (avmagt > 0) dmloc[i] = avmago / avmagt;

// Measurement of chi^2
chi2loc[il] = 0;
chi2loc[i] = 0;
for (size_t k = 0; k < imagm; k++) {
double inter = s2n[k] - dmloc[il] * flux[il][k] * invsab[k];
chi2loc[il] += inter * inter;
double inter = s2n[k] - dmloc[i] * flux[i][k] * invsab[k];
chi2loc[i] += inter * inter;
}

// Upper-limits. Check first if some bands have upper-limits, before
// applying the condition
if (nbul > 0) {
for (size_t k = 0; k < imagm; k++) {
if ((dmloc[il] * busul[k] * flux[il][k]) > ab[k] && busnorma[k] == 1)
chi2loc[il] = HIGH_CHI2;
if ((dmloc[i] * busul[k] * flux[i][k]) > ab[k] && busnorma[k] == 1)
chi2loc[i] = HIGH_CHI2;
}
}

// Model rejection based on prior
// Abs mag rejection
double reds;
int libtype;
reds = (fulllib[il])->red;
libtype = (fulllib[il])->nlib;
SED *sed = fulllib[i];
reds = sed->red;
libtype = sed->nlib;
// Abs Magnitude from model @ z=0 for rejection for galaxies and AGN
if ((mabsGALprior && libtype == 0) || (mabsAGNprior && libtype == 1)) {
// predicted magnitudes within the babs filter renormalized by the
// scaling
double abs_mag;
abs_mag = (fulllib[il])->mag0 - 2.5 * log10(dmloc[il]);
abs_mag = sed->mag0 - 2.5 * log10(dmloc[i]);
// specific case when the redshift is 0
if (reds < 1.e-10) abs_mag = abs_mag - funz0;
// Galaxy rejection
if ((abs_mag <= priorLib[0] || abs_mag >= priorLib[1]) && libtype == 0)
chi2loc[il] = HIGH_CHI2;
chi2loc[i] = HIGH_CHI2;
// AGN rejection
if ((abs_mag <= priorLib[2] || abs_mag >= priorLib[3]) && libtype == 1)
chi2loc[il] = HIGH_CHI2;
chi2loc[i] = HIGH_CHI2;
}
// Prior N(z)
if (bp[0] >= 0 && libtype == 0) {
double pweight =
nzprior((fulllib[il])->luv, (fulllib[il])->lnir, reds, bp);
chi2loc[il] = chi2loc[il] - 2. * log(pweight);
double pweight = nzprior(sed->luv, sed->lnir, reds, bp);
chi2loc[i] = chi2loc[i] - 2. * log(pweight);
}

// keep the minimum chi2
if (chi2loc[il] < HIGH_CHI2) {
int nlibloc = (fulllib[il])->nlib;
// If local minimum inside the thread, store the new minimum for the
// thread done per type s (0 gal, 1 AGN, 2 stars)
if (locChi2[nlibloc][thread_id] > chi2loc[il]) {
locChi2[nlibloc][thread_id] = chi2loc[il];
locInd[nlibloc][thread_id] = (fulllib[il])->index;
// keep track of the minimum chi2 for each object type, over the threads
if (chi2loc[i] < HIGH_CHI2) {
object_type type = sed->get_object_type();
if (chi2_vals[thread_id][type] > chi2loc[i]) {
chi2_vals[thread_id][type] = chi2loc[i];
chi2_idx[thread_id][type] = sed->index;
}
}

// Write the chi2
fulllib[il]->chi2 = chi2loc[il];
fulllib[il]->dm = dmloc[il];
sed->chi2 = chi2loc[i];
sed->dm = dmloc[i];
}

#ifdef _OPENMP
Expand All @@ -501,9 +501,9 @@ void onesource::fit(vector<SED *> &fulllib, const vector<vector<double>> &flux,
for (int k = 0; k < 3; k++) {
for (int j = 0; j < number_threads; j++) {
// Minimum over the full redshift range for the galaxies
if (chimin[k] > locChi2[k][j]) {
chimin[k] = locChi2[k][j];
indmin[k] = locInd[k][j];
if (chimin[k] > chi2_vals[j][k]) {
chimin[k] = chi2_vals[j][k];
indmin[k] = chi2_idx[j][k];
}
}
}
Expand Down Expand Up @@ -647,8 +647,7 @@ double onesource::nzprior(const double luv, const double lnir,
void onesource::rm_discrepant(vector<SED *> &fulllib,
const vector<vector<double>> &flux,
const vector<size_t> &va, const double funz0,
const array<int, 2> bp, double thresholdChi2,
bool verbose) {
const array<int, 2> bp, double thresholdChi2) {
size_t imagm = busnorma.size();
double newmin, improvedChi2;
// Start with the best chi2 among the libraries
Expand Down Expand Up @@ -750,8 +749,9 @@ void onesource::fitIR(vector<SED *> &fulllibIR,
// Check that the normalisation should be computed (if the scaling
// should be free)
if (nbusIR < 1) {
cout << "WARNING: No scaling in IR " << spec << " " << nbusIR << " "
<< endl;
if (verbose)
cout << "WARNING: No scaling in IR " << spec << " " << nbusIR << " "
<< endl;
} else {
dmloc = avmago / avmagt;
}
Expand Down Expand Up @@ -832,10 +832,10 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
// Do a local minimisation per thread (store chi2 and index) dim 1: type, dim
// 2: thread, 3: index of the redshift grid
vector<vector<vector<double>>> locChi2(
3,
vector<vector<double>>(dimzg, vector<double>(number_threads, HIGH_CHI2)));
number_threads,
vector<vector<double>>(3, vector<double>(dimzg, HIGH_CHI2)));
vector<vector<vector<int>>> locInd(
3, vector<vector<int>>(dimzg, vector<int>(number_threads, -1)));
number_threads, vector<vector<int>>(3, vector<int>(dimzg, -1)));

// to create the marginalized PDF
int pos = 0;
Expand Down Expand Up @@ -914,7 +914,7 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
SED *sed = fulllib[il];
// Check that the model has a defined probability
if (sed->chi2 < HIGH_CHI2) {
object_type nlibloc = sed->nlib;
object_type nlibloc = sed->get_object_type();

// Marginalization for the galaxies
if (nlibloc == object_type::GAL) {
Expand Down Expand Up @@ -1004,39 +1004,35 @@ void onesource::generatePDF(vector<SED *> &fulllib, const vector<size_t> &va,
if (chi2loc < HIGH_CHI2) {
// 11: BAYZG
int poszloc = pdfbayzg.index(sed->red);
object_type nlibloc = sed->nlib;
object_type nlibloc = sed->get_object_type();
int indloc = sed->index;
// If local minimum inside the thread, store the new minimum for the
// thread
if (locChi2[nlibloc][poszloc][thread_id] > chi2loc) {
locChi2[nlibloc][poszloc][thread_id] = chi2loc;
locInd[nlibloc][poszloc][thread_id] = indloc;
if (locChi2[thread_id][nlibloc][poszloc] > chi2loc) {
locChi2[thread_id][nlibloc][poszloc] = chi2loc;
locInd[thread_id][nlibloc][poszloc] = indloc;
}
}
}

// Find the minimum for each redshift step by collapsing the threads
#pragma omp for
for (int i = 0; i < dimzg; i++) {
auto &loc0chi2 = locChi2[0][i];
auto &loc1chi2 = locChi2[1][i];
auto &loc0ind = locInd[0][i];
auto &loc1ind = locInd[1][i];
for (int j = 0; j < number_threads; j++) {
// 0:["MASS"] / 1:["SFR"] / 2:["SSFR"] / 3:["LDUST"] / 4:["LIR"] /
// 5:["AGE"] / 6:["COL1"] / 7:["COL2"] / 8:["MREF"]/ 9:["MIN_ZG"] /
// 10:["MIN_ZQ"] / 11:["BAY_ZG"] / 11:["BAY_ZQ"] look for the new
// minimum among the threads / Galaxies
#pragma omp critical
{
if (pdfminzg.chi2[i] > loc0chi2[j]) {
pdfminzg.chi2[i] = loc0chi2[j];
pdfminzg.ind[i] = loc0ind[j];
if (pdfminzg.chi2[i] > locChi2[j][0][i]) {
pdfminzg.chi2[i] = locChi2[j][0][i];
pdfminzg.ind[i] = locInd[j][0][i];
}
// look for the new minimum among the threads / AGN
if (pdfminzq.chi2[i] > loc1chi2[j]) {
pdfminzq.chi2[i] = loc1chi2[j];
pdfminzq.ind[i] = loc1ind[j];
if (pdfminzq.chi2[i] > locChi2[j][1][i]) {
pdfminzq.chi2[i] = locChi2[j][1][i];
pdfminzq.ind[i] = locInd[j][1][i];
}
}
}
Expand Down Expand Up @@ -2288,7 +2284,7 @@ void onesource::writeFullChi(vector<SED *> &fulllib) {
// Normalisation
sca = fulllib[k]->dm;
// Write
stochi << fulllib[k]->nlib << " ";
stochi << fulllib[k]->get_object_type() << " ";
stochi << fulllib[k]->red << " ";
stochi << fulllib[k]->nummod << " ";
stochi << fulllib[k]->age << " ";
Expand Down
16 changes: 14 additions & 2 deletions src/lib/onesource.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ represents an object from a catalogue, and manages its fitting to an SED.
*/
class onesource {
private:
bool verbose;

public:
//{"MASS_BEST","SFR_BEST","SSFR_BEST","LDUST_BEST","LUM_TIR_BEST","AGE_BEST","EBV_BEST","EXTLAW_BEST","LUM_NUV_BEST","LUM_R_BEST","LUM_K_BEST",}
unordered_map<string, double> results = {
Expand Down Expand Up @@ -160,6 +162,17 @@ class onesource {
busnorma.clear();
}

//! Set verbosity
/*!
@param v bool specifying whether output should be verbose or not.
*/
inline void set_verbosity(const bool v) { verbose = v; }
//! Get verbosity
/*!
@return bool specifying whether output is verbose or not.
*/
inline bool get_verbosity() const { return verbose; }

// Prototype
void readsource(const string &identifier, const vector<double> vals,
const vector<double> err_vals, const long context,
Expand All @@ -184,8 +197,7 @@ class onesource {
const array<int, 2> bp);
void rm_discrepant(vector<SED *> &fulllib, const vector<vector<double>> &flux,
const vector<size_t> &valid, const double funz0,
const array<int, 2> bp, double thresholdChi2,
bool verbose);
const array<int, 2> bp, double thresholdChi2);
void write_out(vector<SED *> &fulllib, vector<SED *> &fulllibIR,
ofstream &stout, vector<string> outkeywords);
void write_pdz_header(vector<string> pdztype,
Expand Down
5 changes: 3 additions & 2 deletions src/lib/photoz_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ vector<onesource *> PhotoZ::read_autoadapt_sources() {
if (check_first_char(line)) {
// Construct one objet
onesource *oneObj = yield(nobj, line);
oneObj->set_verbosity(verbose);

// Keep only sources with a spectroscopic redshift
if (oneObj->zs > adzmin && oneObj->zs < adzmax) {
Expand Down Expand Up @@ -1413,6 +1414,7 @@ vector<onesource *> PhotoZ::read_photoz_sources() {

// Generate one objet
onesource *oneObj = yield(nobj, line);
oneObj->set_verbosity(verbose);

// Use zspec from external file
// open the external file with zspec
Expand Down Expand Up @@ -1601,8 +1603,7 @@ void PhotoZ::run_photoz(vector<onesource *> sources, const vector<double> &a0,
oneObj->fit(fullLib, flux, valid, funz0, bp);
// Try to remove some bands to improve the chi2, only as long as the chi2 is
// above a threshold
oneObj->rm_discrepant(fullLib, flux, valid, funz0, bp, thresholdChi2,
verbose);
oneObj->rm_discrepant(fullLib, flux, valid, funz0, bp, thresholdChi2);
// Generate the marginalized PDF (z+physical parameters) from the chi2
// stored in each SED
oneObj->generatePDF(fullLib, valid, fltColRF, fltREF, zfix);
Expand Down
Loading

0 comments on commit 1164c3c

Please sign in to comment.