Skip to content

Commit

Permalink
[feat] support --srcid to simulate all, one or separate sources, #163
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Jan 1, 2024
1 parent f5b4aaa commit 9dc832d
Show file tree
Hide file tree
Showing 18 changed files with 198 additions and 93 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ Monte Carlo eXtreme (MCX) - CUDA Edition
- Author: Qianqian Fang (q.fang at neu.edu)
- License: GNU General Public License version 3 (GPLv3)
- Version: 2.2.pre (v2024.1, Interstellar Ion)
- Website: <http://mcx.space>
- Website: <https://mcx.space>
- Download: <https://mcx.space/wiki/?Get>

![Mex and Binaries](https://github.com/fangq/mcx/actions/workflows/build_all.yml/badge.svg)\
![Linux Python Module](https://github.com/fangq/mcx/actions/workflows/build_linux_manywheel.yml/badge.svg)\
Expand Down
4 changes: 2 additions & 2 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
*Author: Qianqian Fang <q.fang at neu.edu>
*License: GNU General Public License version 3 (GPLv3)
*Version: 2.2.pre (v2024.1, Interstellar Ion)
*Website: http://mcx.space

*Website: https://mcx.space
*Download: https://mcx.space/wiki/?Get
---------------------------------------------------------------------

Table of Content:
Expand Down
1 change: 1 addition & 0 deletions example/multisrc/run_eachsrc.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
..\..\bin\mcx.exe -f multisrc.json -D P -s 'eachsrc' -j '{"Optode":{"Source":{"ID":-1}}}' %*
3 changes: 3 additions & 0 deletions example/multisrc/run_eachsrc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

#!/bin/sh
../../bin/mcx -f multisrc.json -D P -s 'eachsrc' --srcid -1 $@
1 change: 1 addition & 0 deletions example/multisrc/run_src9.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
..\..\bin\mcx.exe -f multisrc.json -D P -s 'source9' --srcid 9 %*
3 changes: 3 additions & 0 deletions example/multisrc/run_src9.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

#!/bin/sh
../../bin/mcx -f multisrc.json -D P -s 'source9' --srcid 9 $@
18 changes: 18 additions & 0 deletions mcxlab/examples/demo_multisrc.m
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,21 @@
flux=mcxlab(cfg);

mcxplotvol(log10(flux.data));

%% setting cfg.srcid to a positive number 1,2,3,.. specifies which src to simulate
cfg.srcid=8;

flux=mcxlab(cfg);

mcxplotvol(log10(flux.data));

%% setting cfg.srcid to -1, solution of each src is stored separately

cfg.srcid=-1;

flux=mcxlab(cfg);

% use up-down button to shift between different sources, there are 9 of
% them

mcxplotvol(log10(squeeze(flux.data)));
3 changes: 3 additions & 0 deletions mcxlab/mcxlab.m
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@
% simultaneously simulated; only works for 'pattern'
% source, see cfg.srctype='pattern' for details
% Example <demo_photon_sharing.m>
% cfg.srcid: when multiple sources are defined, if srcid is -1, each source is separately
% simulated; if set to 0, all source solution are summed; if set to a positive
% number starting from 1, only the specified source is simulated; default to 0
% cfg.omega: source modulation frequency (rad/s) for RF replay, 2*pi*f
% cfg.srciquv: 1x4 vector [I,Q,U,V], Stokes vector of the incident light
% I: total light intensity (I >= 0)
Expand Down
2 changes: 1 addition & 1 deletion pmcx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

- Copyright: (C) Matin Raayai Ardakani (2022-2023) <raayaiardakani.m at northeastern.edu>, Qianqian Fang (2019-2023) <q.fang at neu.edu>, Fan-Yu Yen (2023) <yen.f at northeastern.edu>
- License: GNU Public License V3 or later
- Version: 0.2.9
- Version: 0.2.10
- URL: https://pypi.org/project/pmcx/
- Github: https://github.com/fangq/mcx

Expand Down
2 changes: 1 addition & 1 deletion pmcx/pmcx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# from .files import loadmc2, loadmch, load, save
from .bench import bench

__version__ = "0.2.9"
__version__ = "0.2.10"

__all__ = (
"gpuinfo",
Expand Down
2 changes: 1 addition & 1 deletion pmcx/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def build_extension(self, ext):
setup(
name="pmcx",
packages=["pmcx"],
version="0.2.9",
version="0.2.10",
requires=["numpy"],
license="GPLv3+",
author="Matin Raayai Ardakani, Qianqian Fang, Fan-Yu Yen",
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ NVCCOMP=$(OMP)
CUDA_STATIC=--cudart static -Xcompiler "-static-libgcc -static-libstdc++"

CFLAGS+=-std=c99
CPPFLAGS+=-g -Wall -pedantic #-DNO_LZMA # -DUSE_OS_TIMER
CPPFLAGS+=-g -Wall #-pedantic #-DNO_LZMA # -DUSE_OS_TIMER

OBJSUFFIX=.o
EXESUFFIX=
Expand Down
72 changes: 50 additions & 22 deletions src/mcx_core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,10 @@ __device__ inline void savedetphoton(float n_det[], uint* detectedphoton, float*

if (SAVE_DETID(gcfg->savedetflag)) {
n_det[baseaddr++] = detid;

if (gcfg->extrasrclen && gcfg->srcid <= 0) {
n_det[baseaddr++] = (((int)ppath[gcfg->w0offset - 1]) << 16);
}
}

for (i = 0; i < gcfg->partialdata; i++) {
Expand All @@ -392,7 +396,7 @@ __device__ inline void savedetphoton(float n_det[], uint* detectedphoton, float*
}

if (SAVE_W0(gcfg->savedetflag)) {
n_det[baseaddr++] = ppath[gcfg->w0offset - 1];
n_det[baseaddr++] = ppath[gcfg->w0offset - 2];
}

if (SAVE_IQUV(gcfg->savedetflag)) {
Expand All @@ -415,7 +419,7 @@ __device__ inline void savedetphoton(float n_det[], uint* detectedphoton, float*
* @param[in] gdebugdata: pointer to the global-memory buffer to store the trajectory info
*/

__device__ inline void savedebugdata(MCXpos* p, uint id, float* gdebugdata) {
__device__ inline void savedebugdata(MCXpos* p, uint id, float* gdebugdata, int srcid) {
uint pos = atomicAdd(gjumpdebug, 1);

if (pos < gcfg->maxjumpdebug) {
Expand All @@ -425,7 +429,7 @@ __device__ inline void savedebugdata(MCXpos* p, uint id, float* gdebugdata) {
gdebugdata[pos++] = p->y;
gdebugdata[pos++] = p->z;
gdebugdata[pos++] = p->w;
gdebugdata[pos++] = 0;
gdebugdata[pos++] = srcid;
}
}

Expand Down Expand Up @@ -1039,12 +1043,17 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
*w0 = 1.f; //< reuse to count for launchattempt
int canfocus = 1; //< non-zero: focusable, zero: not focusable
MCXSrc* launchsrc = &(gcfg->src);
ppath[gcfg->w0offset - 1] = 0.f;

if (gcfg->extrasrclen) {
int srcid = rand_uniform01(t) * (gcfg->extrasrclen + 1);
if (gcfg->extrasrclen && gcfg->srcid != 1) {
if (gcfg->srcid > 1) {
launchsrc = (MCXSrc*)(gproperty + gcfg->maxmedia + 1 + gcfg->detnum + ((gcfg->srcid - 2) * 4));
} else { // gcfg->srcid = 0 or -1: simulate all sources; = 0 merge all solutions; = -1 separately store each source
ppath[gcfg->w0offset - 1] = (int)(rand_uniform01(t) * JUST_BELOW_ONE * (gcfg->extrasrclen + 1)) + 1; // borrow initial weight section of photon-sharing for storing launch src id

if (srcid) {
launchsrc = (MCXSrc*)(gproperty + gcfg->maxmedia + 1 + gcfg->detnum + ((srcid - 1) << 2));
if ((int)ppath[gcfg->w0offset - 1] > 1) {
launchsrc = (MCXSrc*)(gproperty + gcfg->maxmedia + 1 + gcfg->detnum + ((int)(ppath[gcfg->w0offset - 1] - 2) * 4));
}
}
}

Expand All @@ -1065,13 +1074,17 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
ppath[gcfg->partialdata] += p->w; //< sum all the remaining energy

if (gcfg->debuglevel & (MCX_DEBUG_MOVE | MCX_DEBUG_MOVE_ONLY)) {
savedebugdata(p, ((uint)f->ndone) + threadid * gcfg->threadphoton + umin(threadid, gcfg->oddphotons), gdebugdata);
savedebugdata(p, ((uint)f->ndone) + threadid * gcfg->threadphoton + umin(threadid, gcfg->oddphotons), gdebugdata, (int)ppath[gcfg->w0offset - 1]);
}

if (*mediaid == 0 && *idx1d != OUTSIDE_VOLUME_MIN && *idx1d != OUTSIDE_VOLUME_MAX && gcfg->issaveref) {
if (gcfg->issaveref == 1) {
int tshift = MIN(gcfg->maxgate - 1, (int)(floorf((f->t - gcfg->twin0) * gcfg->Rtstep)));

if (gcfg->extrasrclen && gcfg->srcid < 0) {
tshift += ((int)ppath[gcfg->w0offset - 1] - 1) * gcfg->maxgate;
}

if (gcfg->srctype != MCX_SRC_PATTERN && gcfg->srctype != MCX_SRC_PATTERN3D) {
#ifdef USE_ATOMIC
#ifdef USE_DOUBLE
Expand Down Expand Up @@ -1206,12 +1219,12 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
if (gcfg->srctype == MCX_SRC_PATTERN) { // need to prevent rx/ry=1 here
if (gcfg->srcnum <= 1) {
p->w = launchsrc->pos.w * srcpattern[(int)(ry * JUST_BELOW_ONE * launchsrc->param2.w) * (int)(launchsrc->param1.w) + (int)(rx * JUST_BELOW_ONE * launchsrc->param1.w)];
ppath[3] = p->w;
ppath[4] = p->w;
} else {
*((uint*)(ppath + 2)) = ((int)(ry * JUST_BELOW_ONE * launchsrc->param2.w) * (int)(launchsrc->param1.w) + (int)(rx * JUST_BELOW_ONE * launchsrc->param1.w));

for (int i = 0; i < gcfg->srcnum; i++) {
ppath[i + 3] = srcpattern[(*((uint*)(ppath + 2))) * gcfg->srcnum + i];
ppath[i + 4] = srcpattern[(*((uint*)(ppath + 2))) * gcfg->srcnum + i];
}

p->w = 1.f;
Expand All @@ -1220,13 +1233,13 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
if (gcfg->srcnum <= 1) {
p->w = launchsrc->pos.w * srcpattern[(int)(rz * JUST_BELOW_ONE * launchsrc->param1.z) * (int)(launchsrc->param1.y) * (int)(launchsrc->param1.x) +
(int)(ry * JUST_BELOW_ONE * launchsrc->param1.y) * (int)(launchsrc->param1.x) + (int)(rx * JUST_BELOW_ONE * launchsrc->param1.x)];
ppath[3] = p->w;
ppath[4] = p->w;
} else {
*((uint*)(ppath + 2)) = ((int)(rz * JUST_BELOW_ONE * launchsrc->param1.z) * (int)(launchsrc->param1.y) * (int)(launchsrc->param1.x) +
(int)(ry * JUST_BELOW_ONE * launchsrc->param1.y) * (int)(launchsrc->param1.x) + (int)(rx * JUST_BELOW_ONE * launchsrc->param1.x));

for (int i = 0; i < gcfg->srcnum; i++) {
ppath[i + 3] = srcpattern[(*((uint*)(ppath + 2))) * gcfg->srcnum + i];
ppath[i + 4] = srcpattern[(*((uint*)(ppath + 2))) * gcfg->srcnum + i];
}

p->w = 1.f;
Expand Down Expand Up @@ -1558,7 +1571,7 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
updateproperty<islabel, issvmc>(prop, *mediaid, t, *idx1d, media, (float3*)p, nuvox, flipdir);

if (gcfg->debuglevel & (MCX_DEBUG_MOVE | MCX_DEBUG_MOVE_ONLY)) {
savedebugdata(p, (uint)f->ndone + threadid * gcfg->threadphoton + umin(threadid, gcfg->oddphotons), gdebugdata);
savedebugdata(p, (uint)f->ndone + threadid * gcfg->threadphoton + umin(threadid, gcfg->oddphotons), gdebugdata, (int)ppath[3]);
}

/**
Expand All @@ -1574,7 +1587,7 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*

if (gcfg->outputtype == otRF) { // if run RF replay
f->pathlen = photontof[(threadid * gcfg->threadphoton + min(threadid, gcfg->oddphotons - 1) + (int)f->ndone)];
sincosf(gcfg->omega * f->pathlen, ppath + 4 + gcfg->srcnum, ppath + 3 + gcfg->srcnum);
sincosf(gcfg->omega * f->pathlen, ppath + 5 + gcfg->srcnum, ppath + 4 + gcfg->srcnum);
}

f->pathlen = 0.f;
Expand Down Expand Up @@ -1896,6 +1909,11 @@ __global__ void mcx_main_loop(uint media[], OutputType field[], float genergy[],
tmp0 = (gcfg->outputtype == otDCS) ? (1.f - ctheta) : 1.f;
tshift = (int)(floorf((photontof[tshift] - gcfg->twin0) * gcfg->Rtstep)) +
( (gcfg->replaydet == -1) ? ((photondetid[tshift] - 1) * gcfg->maxgate) : 0);

if (gcfg->extrasrclen && gcfg->srcid < 0) {
tshift += ((int)ppath[gcfg->w0offset - 1] - 1) * gcfg->maxgate;
}

#ifdef USE_ATOMIC

if (!gcfg->isatomic) {
Expand Down Expand Up @@ -1924,7 +1942,7 @@ __global__ void mcx_main_loop(uint media[], OutputType field[], float genergy[],
}

if (gcfg->debuglevel & (MCX_DEBUG_MOVE | MCX_DEBUG_MOVE_ONLY)) {
savedebugdata(&p, (uint)f.ndone + idx * gcfg->threadphoton + umin(idx, gcfg->oddphotons), gdebugdata);
savedebugdata(&p, (uint)f.ndone + idx * gcfg->threadphoton + umin(idx, gcfg->oddphotons), gdebugdata, (int)ppath[gcfg->w0offset - 1]);
}
}

Expand Down Expand Up @@ -2074,6 +2092,10 @@ __global__ void mcx_main_loop(uint media[], OutputType field[], float genergy[],
weight = w0 * f.pathlen;
}

if (gcfg->extrasrclen && gcfg->srcid < 0) {
tshift += ((int)ppath[gcfg->w0offset - 1] - 1) * gcfg->maxgate;
}

GPUDEBUG(("deposit to [%d] %e, w=%f\n", idx1dold, weight, p.w));

if (fabsf(weight) > 0.f || gcfg->outputtype == otRF) {
Expand Down Expand Up @@ -2607,7 +2629,7 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {
uint sharedbuf = 0;

/** \c dimxyz - output volume variable \c field voxel count, Nx*Ny*Nz*Ns where Ns=cfg.srcnum is the pattern number for photon sharing */
int dimxyz = cfg->dim.x * cfg->dim.y * cfg->dim.z * ((cfg->srctype == MCX_SRC_PATTERN || cfg->srctype == MCX_SRC_PATTERN3D) ? cfg->srcnum : 1);
int dimxyz = cfg->dim.x * cfg->dim.y * cfg->dim.z * ((cfg->srctype == MCX_SRC_PATTERN || cfg->srctype == MCX_SRC_PATTERN3D) ? cfg->srcnum : (cfg->srcid == -1) ? (cfg->extrasrclen + 1) : 1);

/** \c media - input volume representing the simulation domain, format specified in cfg.mediaformat, read-only */
uint* media = (uint*)(cfg->vol);
Expand Down Expand Up @@ -2661,15 +2683,15 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {
* |-----------------------------------------------> hostdetreclen <--------------------------------------|
* |------------------------> partialdata <-------------------|
*host detected photon buffer: detid (1), partial_scat (#media), partial_path (#media), momontum (#media), p_exit (3), v_exit(3), w0 (1)
* |---------------------------------------------> w0offset <-------------------------------------||<----- w0 (#srcnum) ----->||<- RF replay (2)->|
*gpu detected photon buffer: partial_scat (#media), partial_path (#media), momontum (#media), E_escape (1), E_launch (1), w0 (1), w0_photonsharing (#srcnum) cos(w*T),sin(w*T)
* |---------------------------------------------> w0offset <-----------------------------------------------||<----- w0 (#srcnum) ----->||<- RF replay (2)->|
*gpu detected photon buffer: partial_scat (#media), partial_path (#media), momontum (#media), E_escape (1), E_launch (1), w0 (1), srcid(1), w0_photonsharing (#srcnum) cos(w*T),sin(w*T)
*/

//< \c partialdata: per-photon buffer length for media-specific data, copy from GPU to host
unsigned int partialdata = (cfg->medianum - 1) * (SAVE_NSCAT(cfg->savedetflag) + SAVE_PPATH(cfg->savedetflag) + SAVE_MOM(cfg->savedetflag));

//< \c w0offset - offset in the per-photon buffer to the start of the photon sharing related data
unsigned int w0offset = partialdata + 3;
unsigned int w0offset = partialdata + 4; //< the extra 4 numbers are total-escaped-energy, total-launched-energy, initial-weight, source_id

//< \c hostdetreclen - host-side det photon data buffer per-photon length
unsigned int hostdetreclen = partialdata + SAVE_DETID(cfg->savedetflag) + 3 * (SAVE_PEXIT(cfg->savedetflag) + SAVE_VEXIT(cfg->savedetflag)) + SAVE_W0(cfg->savedetflag) + 4 * SAVE_IQUV(cfg->savedetflag);
Expand All @@ -2680,8 +2702,8 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {
/** \c param - constants to be used in the GPU, copied to GPU as \c gcfg, stored in the constant memory */
MCXParam param = {cfg->steps, minstep, 0, 0, cfg->tend, R_C0* cfg->unitinmm,
(uint)cfg->issave2pt, (uint)cfg->isreflect, (uint)cfg->isrefint, (uint)cfg->issavedet, 1.f / cfg->tstep,
p0, c0, cfg->srcparam1, cfg->srcparam2, cfg->extrasrclen, s0, maxidx, uint4(0, 0, 0, 0), cp0, cp1, uint2(0, 0), cfg->minenergy,
cfg->sradius* cfg->sradius, minstep* R_C0* cfg->unitinmm, cfg->srctype,
p0, c0, cfg->srcparam1, cfg->srcparam2, cfg->extrasrclen, cfg->srcid, s0, maxidx, uint4(0, 0, 0, 0),
cp0, cp1, uint2(0, 0), cfg->minenergy, cfg->sradius* cfg->sradius, minstep* R_C0* cfg->unitinmm, cfg->srctype,
cfg->voidtime, cfg->maxdetphoton,
cfg->medianum - 1, cfg->detnum, cfg->polmedianum, cfg->maxgate, ABS(cfg->sradius + 2.f) < EPS /*isatomic*/,
(uint)cfg->maxvoidstep, cfg->issaveseed > 0, (uint)cfg->issaveref, cfg->isspecular > 0,
Expand Down Expand Up @@ -2782,6 +2804,8 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {

param.maxgate = gpu[gpuid].maxgate;

printf("allocate %d floats [%d %d %d]\n", dimxyz * gpu[gpuid].maxgate * 2, dimxyz, gpu[gpuid].maxgate, cfg->detnum);

/** If cfg.respin is positive, the output data have to be accummulated, so we use a double-buffer to retrieve and then accummulate */
if (ABS(cfg->respin) > 1) {
if (cfg->seed == SEED_FROM_FILE && cfg->replaydet == -1) {
Expand Down Expand Up @@ -3051,9 +3075,9 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) {
/** Inside the GPU kernel, volume is always assumbed to be col-major (like those generated by MATLAB or FORTRAN) */
cachebox.x = (cp1.x - cp0.x + 1);
cachebox.y = (cp1.y - cp0.y + 1) * (cp1.x - cp0.x + 1);

dimlen.x = cfg->dim.x;
dimlen.y = cfg->dim.y * cfg->dim.x;

dimlen.z = cfg->dim.x * cfg->dim.y * cfg->dim.z;
dimlen.w = fieldlen;

Expand Down Expand Up @@ -3700,6 +3724,10 @@ is more than what your have specified (%d), please use the -H option to specify
}
}

if (cfg->extrasrclen && cfg->srcid < 0) { // when multiple sources are simulated, the total photons are evenly divided
scale[0] *= (cfg->extrasrclen + 1);
}

cfg->normalizer = scale[0];
cfg->his.normalizer = scale[0];

Expand Down
1 change: 1 addition & 0 deletions src/mcx_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ typedef struct __align__(16) KernelParams {
float Rtstep; /**< reciprocal of the step size */
MCXSrc src; /**< additional source data, including pos, dir, param1, param2 */
unsigned int extrasrclen; /**< number of additional sources */
int srcid; /**< 0: simulate all sources combined, -1: simulate all sources separately; >0: only simulate a single source */
float4 s0; /**< initial stokes parameters, for polarized photon simulation */
float3 maxidx; /**< maximum index in x/y/z directions for out-of-bound tests */
uint4 dimlen; /**< maximum index used to convert x/y/z to 1D array index */
Expand Down
Loading

0 comments on commit 9dc832d

Please sign in to comment.