Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring #665

Merged
merged 2 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 8 additions & 23 deletions include/ReadExperiment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

// Boost includes
#include <boost/filesystem.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/range/irange.hpp>

// Cereal includes
Expand All @@ -48,10 +49,12 @@ class ReadExperiment {
public:
ReadExperiment(std::vector<ReadLibrary>& readLibraries,
// const boost::filesystem::path& transcriptFile,
const boost::filesystem::path& indexDirectory,
SalmonIndex* salmonIndex,
// const boost::filesystem::path& indexDirectory,
SalmonOpts& sopt)
: readLibraries_(readLibraries),
// transcriptFile_(transcriptFile),
salmonIndex_(salmonIndex),
transcripts_(std::vector<Transcript>()), totalAssignedFragments_(0),
fragStartDists_(5), posBiasFW_(5), posBiasRC_(5), posBiasExpectFW_(5),
posBiasExpectRC_(5), /*seqBiasModel_(1.0),*/ eqBuilder_(sopt.jointLog, sopt.maxHashResizeThreads),
Expand Down Expand Up @@ -115,24 +118,6 @@ class ReadExperiment {
}
*/

// ==== Figure out the index type
boost::filesystem::path versionPath = indexDirectory / "versionInfo.json";
SalmonIndexVersionInfo versionInfo;
versionInfo.load(versionPath);
if (versionInfo.indexVersion() == 0) {
fmt::MemoryWriter infostr;
infostr << "Error: The index version file " << versionPath.string()
<< " doesn't seem to exist. Please try re-building the salmon "
"index.";
throw std::invalid_argument(infostr.str());
}
// Check index version compatibility here
auto indexType = versionInfo.indexType();
// ==== Figure out the index type

salmonIndex_.reset(new SalmonIndex(sopt.jointLog, indexType));
salmonIndex_->load(indexDirectory);

// Now we'll have either an FMD-based index or a QUASI index
// dispatch on the correct type.
fmt::MemoryWriter infostr;
Expand All @@ -159,7 +144,7 @@ class ReadExperiment {
// Create the cluster forest for this set of transcripts
clusters_.reset(new ClusterForest(transcripts_.size(), transcripts_));
}

EQBuilderT& equivalenceClassBuilder() { return eqBuilder_; }

std::string getIndexSeqHash256() const { return salmonIndex_->seqHash256(); }
Expand Down Expand Up @@ -262,7 +247,7 @@ class ReadExperiment {
}
}

SalmonIndex* getIndex() { return salmonIndex_.get(); }
SalmonIndex* getIndex() { return salmonIndex_; }

template <typename PuffIndexT>
void loadTranscriptsFromPuff(PuffIndexT* idx_, const SalmonOpts& sopt) {
Expand Down Expand Up @@ -416,7 +401,7 @@ class ReadExperiment {
std::atomic<bool> burnedIn{
totalAssignedFragments_ + numAssignedFragments_ >= sopt.numBurninFrags};
for (auto& rl : readLibraries_) {
processReadLibrary(rl, salmonIndex_.get(), transcripts_, clusterForest(),
processReadLibrary(rl, salmonIndex_, transcripts_, clusterForest(),
*(fragLengthDist_.get()), numAssignedFragments_,
numThreads, burnedIn);
}
Expand Down Expand Up @@ -806,7 +791,7 @@ class ReadExperiment {
/**
* The index we've built on the set of transcripts.
*/
std::unique_ptr<SalmonIndex> salmonIndex_{nullptr};
SalmonIndex* salmonIndex_{nullptr};
/**
* The cluster forest maintains the dynamic relationship
* defined by transcripts and reads --- if two transcripts
Expand Down
5 changes: 5 additions & 0 deletions include/SalmonIndex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,9 @@ class SalmonIndex {
std::string decoyNameHash256_;
};

// Convenience function to load an index
std::unique_ptr<SalmonIndex>
checkLoadIndex(const boost::filesystem::path& indexDirectory,
std::shared_ptr<spdlog::logger>& logger);

#endif //__SALMON_INDEX_HPP
52 changes: 26 additions & 26 deletions src/Alevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<HEADER
**/

#include <memory>
#include <random>
#include <algorithm>
#include <atomic>
Expand Down Expand Up @@ -63,11 +64,11 @@

// salmon includes
#include "FastxParser.hpp"
#include "ProgramOptionsGenerator.hpp"
#include "SalmonConfig.hpp"
#include "SalmonDefaults.hpp"
#include "SalmonOpts.hpp"
#include "SalmonUtils.hpp"
#include "ProgramOptionsGenerator.hpp"

using paired_parser_qual = fastx_parser::FastxParser<fastx_parser::ReadQualPair>;
using single_parser = fastx_parser::FastxParser<fastx_parser::ReadSeq>;
Expand All @@ -78,20 +79,18 @@ namespace apt = alevin::protocols;
namespace aut = alevin::utils;

template <typename ProtocolT>
int alevin_sc_align(AlevinOpts<ProtocolT>& aopt,
SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions);
int alevin_sc_align(AlevinOpts<ProtocolT>& aopt, SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions,
std::unique_ptr<SalmonIndex>& salmonIndex);

template <typename ProtocolT>
int alevinQuant(AlevinOpts<ProtocolT>& aopt,
SalmonOpts& sopt,
SoftMapT& barcodeMap,
TrueBcsT& trueBarcodes,
int alevinQuant(AlevinOpts<ProtocolT>& aopt, SalmonOpts& sopt,
SoftMapT& barcodeMap, TrueBcsT& trueBarcodes,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
boost::program_options::parsed_options& orderedOptions,
CFreqMapT& freqCounter,
size_t numLowConfidentBarcode);
CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
std::unique_ptr<SalmonIndex>& salmonIndex);

//colors for progress monitoring
const char RESET_COLOR[] = "\x1b[0m";
Expand Down Expand Up @@ -835,7 +834,8 @@ void initiatePipeline(AlevinOpts<ProtocolT>& aopt,
boost::program_options::variables_map& vm,
std::string commentString, bool noTgMap,
std::vector<std::string> barcodeFiles,
std::vector<std::string> readFiles){
std::vector<std::string> readFiles,
std::unique_ptr<SalmonIndex>& salmonIndex){
bool isOptionsOk = aut::processAlevinOpts(aopt, sopt, noTgMap, vm);
if (!isOptionsOk){
aopt.jointLog->flush();
Expand Down Expand Up @@ -894,14 +894,14 @@ void initiatePipeline(AlevinOpts<ProtocolT>& aopt,
// write out the cmd_info.json to make sure we have that
boost::filesystem::path outputDirectory = vm["output"].as<std::string>();
bool isWriteOk = aut::writeCmdInfo(outputDirectory / "cmd_info.json", orderedOptions);

if(!isWriteOk){
fmt::print(stderr, "Writing cmd_info.json in output directory failed.\nExiting now.");
exit(1);
}

// do the actual mapping
auto rc = alevin_sc_align(aopt, sopt, orderedOptions);

auto rc = alevin_sc_align(aopt, sopt, orderedOptions, salmonIndex);
if (rc == 0) {
aopt.jointLog->info("sc-align successful.");
} else {
Expand Down Expand Up @@ -949,7 +949,7 @@ void initiatePipeline(AlevinOpts<ProtocolT>& aopt,
aopt.jointLog->info("Done with Barcode Processing; Moving to Quantify\n");
alevinQuant(aopt, sopt, barcodeSoftMap, trueBarcodes,
txpToGeneMap, geneIdxMap, orderedOptions,
freqCounter, numLowConfidentBarcode);
freqCounter, numLowConfidentBarcode, salmonIndex);
}
else{
boost::filesystem::path cmdInfoPath = vm["output"].as<std::string>();
Expand All @@ -962,7 +962,7 @@ void initiatePipeline(AlevinOpts<ProtocolT>& aopt,
}
}

int salmonBarcoding(int argc, const char* argv[]) {
int salmonBarcoding(int argc, const char* argv[], std::unique_ptr<SalmonIndex>& salmonIndex) {
namespace bfs = boost::filesystem;
namespace po = boost::program_options;

Expand Down Expand Up @@ -1077,7 +1077,7 @@ salmon-based processing of single-cell RNA-seq data.
//aopt.jointLog->warn("Using DropSeq Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else if(indrop){
std::cout<<"Indrop get neighbors removed, please use other protocols";
Expand All @@ -1089,7 +1089,7 @@ salmon-based processing of single-cell RNA-seq data.
//aopt.jointLog->warn("Using InDrop Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else{
fmt::print(stderr, "ERROR: indrop needs w1 flag too.\n Exiting Now");
Expand All @@ -1105,7 +1105,7 @@ salmon-based processing of single-cell RNA-seq data.
//aopt.jointLog->warn("Using InDrop Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else{
fmt::print(stderr, "ERROR: citeseq needs featureStart and featureLength flag too.\n Exiting Now");
Expand All @@ -1117,54 +1117,54 @@ salmon-based processing of single-cell RNA-seq data.
//aopt.jointLog->warn("Using 10x v3 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else if(chrom){
AlevinOpts<apt::Chromium> aopt;
//aopt.jointLog->warn("Using 10x v2 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else if(gemcode){
AlevinOpts<apt::Gemcode> aopt;
//aopt.jointLog->warn("Using 10x v1 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
unmateFiles, readFiles);
unmateFiles, readFiles, salmonIndex);
}
else if(celseq){
AlevinOpts<apt::CELSeq> aopt;
//aopt.jointLog->warn("Using CEL-Seq Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else if(celseq2){
AlevinOpts<apt::CELSeq2> aopt;
//aopt.jointLog->warn("Using CEL-Seq2 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}
else if(quartzseq2){
AlevinOpts<apt::QuartzSeq2> aopt;
//aopt.jointLog->warn("Using Quartz-Seq2 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
} else if (custom_old) {
AlevinOpts<apt::Custom> aopt;
//aopt.jointLog->warn("Using Custom Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
} else if (custom_new) {
AlevinOpts<apt::CustomGeometry> aopt;
//aopt.jointLog->warn("Using Custom Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles);
barcodeFiles, readFiles, salmonIndex);
}

} catch (po::error& e) {
Expand Down
24 changes: 23 additions & 1 deletion src/BuildSalmonIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
// http://stackoverflow.com/questions/108318/whats-the-simplest-way-to-test-whether-a-number-is-a-power-of-2-in-c
bool isPowerOfTwo(uint32_t n) { return (n > 0 and (n & (n - 1)) == 0); }

int salmonIndex(int argc, const char* argv[]) {
int salmonIndex(int argc, const char* argv[], std::unique_ptr<SalmonIndex>& /* salmonIndex */) {

using std::string;
namespace bfs = boost::filesystem;
Expand Down Expand Up @@ -256,3 +256,25 @@ Creates a salmon index.
}
return ret;
}

std::unique_ptr<SalmonIndex> checkLoadIndex(const boost::filesystem::path& indexDirectory, std::shared_ptr<spdlog::logger>& logger) {
// ==== Figure out the index type
boost::filesystem::path versionPath =
indexDirectory / "versionInfo.json";
SalmonIndexVersionInfo versionInfo;
versionInfo.load(versionPath);
if (versionInfo.indexVersion() == 0) {
fmt::MemoryWriter infostr;
infostr
<< "Error: The index version file " << versionPath.string()
<< " doesn't seem to exist. Please try re-building the salmon "
"index.";
throw std::invalid_argument(infostr.str());
}
// Check index version compatibility here
auto indexType = versionInfo.indexType();
// ==== Figure out the index type
std::unique_ptr<SalmonIndex> res(new SalmonIndex(logger, indexType));
res->load(indexDirectory);
return res;
}
Loading