Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Feb 16, 2022
2 parents 447ae4e + f166c1e commit 9b30db4
Show file tree
Hide file tree
Showing 36 changed files with 6,267 additions and 566 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -428,9 +428,9 @@ endif()
##
# Set the latest version and look for what we need
##
set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62.0" "1.63.0" "1.64.0" "1.65.0" "1.66.0" "1.67.0" "1.68.0" "1.69.0" "1.70.0" "1.71.0")
set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62.0" "1.63.0" "1.64.0" "1.65.0" "1.66.0" "1.67.0" "1.68.0" "1.69.0" "1.70.0" "1.71.0" "1.72.0" "1.73.0" "1.74.0" "1.75.0" "1.76.0" "1.77.0" "1.78.0")
if (NOT BOOST_RECONFIGURE)
find_package(Boost 1.59.0 COMPONENTS iostreams filesystem system timer chrono program_options)
find_package(Boost 1.59.0 COMPONENTS iostreams system filesystem timer chrono program_options)
message("BOOST_INCLUDEDIR = ${BOOST_INCLUDEDIR}")
message("BOOST_LIBRARYDIR = ${BOOST_LIBRARYDIR}")
message("Boost_FOUND = ${Boost_FOUND}")
Expand Down Expand Up @@ -459,7 +459,7 @@ if(BOOST_RECONFIGURE)
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(Boost_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
set(Boost_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)
find_package(Boost 1.59.0 COMPONENTS iostreams filesystem system timer chrono program_options locale REQUIRED)
find_package(Boost 1.59.0 COMPONENTS iostreams system filesystem timer chrono program_options locale REQUIRED)
set(FETCH_BOOST FALSE)
endif()

Expand Down
2 changes: 1 addition & 1 deletion current_version.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION_MAJOR 1
VERSION_MINOR 6
VERSION_MINOR 7
VERSION_PATCH 0
8 changes: 2 additions & 6 deletions doc/source/alevin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,8 @@ Single-cell protocol specific notes
In cases where single-cell protocol supports variable length cellbarcodes, alevin adds nucleotide padding to make the lengths uniform.
Furthermore, the padding scheme ensures that there are no collisions added in the process. The padding scheme is as follows:

1. sci-RNA-seq3: The barcode is composed of 9-10 bp hairpin adaptor and 10 bp reverse transcription index making it 19-20 bp long. If
the bacode is 20 bp long, alevin adds `A` and it adds `AC` if it is 19 bp long. Thus, the length of barcode in the output is 21 bp.
2. inDropV2: 8-11 bp barcode1 along with 8 bp barcode2 makes up the barcode. For barcode lengths of 16, 17, 18, and 19 bp, alevin adds
`AAAC`, `AAG`, `AT`, and `A` respectively. Thus, the length of barcode in the output is 20 bp. Furthermore, the position of barcode1 is
dependent on finding exact match of sequence `w1`. If exact match is not found, a search for `w1` is performed allowing a maximum hamming
distance 2 b/w `w1` and read2 substring of w1 length within the required bounds; the first match is returned.
1. sci-RNA-seq3: The barcode is composed of 9-10 bp hairpin adaptor and 10 bp reverse transcription index making it 19-20 bp long. If the bacode is 20 bp long, alevin adds *A* and it adds *AC* if it is 19 bp long. Thus, the length of barcode in the output is 21 bp.
2. inDropV2: 8-11 bp barcode1 along with 8 bp barcode2 makes up the barcode. For barcode lengths of 16, 17, 18, and 19 bp, alevin adds *AAAC*, *AAG*, *AT*, and *A* respectively. Thus, the length of barcode in the output is 20 bp. Furthermore, the position of barcode1 is dependent on finding exact match of sequence ``w1``. If exact match is not found, a search for ``w1`` is performed allowing a maximum hamming distance 2 b/w ``w1`` and read2 substring of w1 length within the required bounds; the first match is returned.

Output
------
Expand Down
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@
# built documents.
#
# The short X.Y version.
version = '1.6'
version = '1.7'
# The full version, including alpha/beta/rc tags.
release = '1.6.0'
release = '1.7.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ MAINTAINER [email protected]

ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
ENV SALMON_VERSION 1.6.0
ENV SALMON_VERSION 1.7.0

# salmon binary will be installed in /home/salmon/bin/salmon

Expand Down
2 changes: 1 addition & 1 deletion docker/build_test.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#! /bin/bash
SALMON_VERSION=1.6.0
SALMON_VERSION=1.7.0
docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .
31 changes: 13 additions & 18 deletions include/FastxParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#include <thread>
#include <vector>

extern "C" {
#include "kseq.h"
}
#include "kseq++.hpp"

#include "concurrentqueue.h"

Expand Down Expand Up @@ -59,27 +57,24 @@ typename _Unique_if<T>::_Known_bound make_unique(Args&&...) = delete;
#endif //__FASTX_PARSER_PRECXX14_MAKE_UNIQUE__

namespace fastx_parser {
struct ReadSeq {
std::string seq;
std::string name;
~ReadSeq() {}
};

struct ReadQual {
std::string seq;
std::string name;
std::string qual;
~ReadQual() {}
};
using ReadSeq = klibpp::KSeq;
using ReadQual = klibpp::KSeq;

// The ReadPair and ReadQualPair are obviously
// redundant. But, having them as separate types
// here would allow us to say something at compile
// time about if we expect to be able to look
// at qualities etc. Think more about if we
// really want to keep both of these.
struct ReadPair {
ReadSeq first;
ReadSeq second;
klibpp::KSeq first;
klibpp::KSeq second;
};

struct ReadQualPair {
ReadQual first;
ReadQual second;
klibpp::KSeq first;
klibpp::KSeq second;
};

template <typename T> class ReadChunk {
Expand Down
13 changes: 11 additions & 2 deletions include/FastxParserThreadUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
#include <pthread.h>
#include <random>
#include <thread>

#if defined(__SSE2__)
#include "simde/x86/sse2.h"
#if defined(HAVE_SIMDE)
#include "simde/x86/sse2.h"
#else
#include <emmintrin.h>
#endif
#endif

// Most of this code is taken directly from
Expand All @@ -23,7 +28,11 @@ static const size_t MAX_BACKOFF_ITERS = 1024;

ALWAYS_INLINE static void cpuRelax() {
#if defined(__SSE2__) // AMD and Intel
simde_mm_pause();
#if defined(HAVE_SIMDE)
simde_mm_pause();
#else
_mm_pause();
#endif
#elif defined(__i386__) || defined(__x86_64__)
asm volatile("pause");
#elif defined(__aarch64__)
Expand Down
4 changes: 2 additions & 2 deletions include/SalmonConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

namespace salmon {
constexpr char majorVersion[] = "1";
constexpr char minorVersion[] = "6";
constexpr char minorVersion[] = "7";
constexpr char patchVersion[] = "0";
constexpr char version[] = "1.6.0";
constexpr char version[] = "1.7.0";
constexpr uint32_t indexVersion = 5;
constexpr char requiredQuasiIndexVersion[] = "p7";
} // namespace salmon
Expand Down
3 changes: 3 additions & 0 deletions include/SalmonDefaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ namespace defaults {
constexpr const uint32_t maxSMEMOccs{200};
constexpr const bool initUniform{false};
constexpr const uint32_t maxReadOccs{200};
constexpr const uint32_t maxRecoverReadOccs{2500};
constexpr const uint32_t maxOccsPerHit{1000};
constexpr const bool noLengthCorrection{false};
constexpr const bool noEffectiveLengthCorrection{false};
Expand Down Expand Up @@ -139,6 +140,8 @@ namespace defaults {
constexpr const bool isCITESeq{false};
constexpr const bool isCELSeq{false};
constexpr const bool isCELSeq2{false};
constexpr const bool isSplitSeqV1{false};
constexpr const bool isSplitSeqV2{false};
constexpr const bool isQuartzSeq2{false};
constexpr const bool isSciSeq3{false};
constexpr const bool noQuant{false};
Expand Down
4 changes: 2 additions & 2 deletions include/SalmonMappingUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
#include "pufferfish/ksw2pp/KSW2Aligner.hpp"
#include "pufferfish/metro/metrohash64.h"
#include "pufferfish/SelectiveAlignmentUtils.hpp"
#include "pufferfish/chobo/small_vector.hpp"
#include "pufferfish/itlib/small_vector.hpp"
#include "parallel_hashmap/phmap.h"

namespace salmon {
Expand Down Expand Up @@ -122,7 +122,7 @@ namespace salmon {
int32_t secondBestScore;
int32_t bestDecoyScore;
double decoyThresh;
chobo::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
itlib::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
bool collect_decoy_info_;
std::vector<int32_t> scores_;
phmap::flat_hash_map<uint32_t, std::pair<int32_t, int32_t>> bestScorePerTranscript_;
Expand Down
4 changes: 4 additions & 0 deletions include/SalmonOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ struct SalmonOpts {

uint32_t maxReadOccs; // Discard reads mapping to more than this many places.

uint32_t maxRecoverReadOccs; // If a read had hits but maps to > maxReadOccs loci, then
// try to recover mappings up to this many loci (currently only
// affects behavior in alevin --sketch mode).

uint32_t maxExpectedReadLen; // Maximum expected length of an observed read.

// hidden / for extreme control
Expand Down
8 changes: 8 additions & 0 deletions include/SalmonUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ inline void incLoop(std::atomic<double>& val, double inc) {

std::string getCurrentTimeAsString();

// encodes the heuristic for guessing how threads should
// be allocated based on the available reads
// returns true if input was modified and false otherwise.
bool configure_parsing(size_t nfiles, // input param
size_t& worker_threads, // input/output param
uint32_t& parse_threads // input/output param
);

bool validateOptionsAlignment_(SalmonOpts& sopt);
bool validateOptionsMapping_(SalmonOpts& sopt);

Expand Down
16 changes: 13 additions & 3 deletions include/SingleCellProtocols.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "AlevinOpts.hpp"
#include "AlevinTypes.hpp"
#include "pufferfish/chobo/static_vector.hpp"
#include "pufferfish/itlib/static_vector.hpp"

namespace alevin{
namespace protocols {
Expand All @@ -15,8 +15,8 @@ namespace alevin{
struct TagGeometry {
// uint32_t read_num{0};
// tuples are read_num, start_pos, length
chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs1{};
chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs2{};
itlib::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs1{};
itlib::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs2{};
// the total length of the tag on read 1
size_t length1{0};
// the total length of the tag on read 2
Expand Down Expand Up @@ -177,6 +177,16 @@ namespace alevin{
CELSeq2(): Rule(6, 6, BarcodeEnd::FIVE, 4096){}
};

struct SplitSeqV2 : Rule{
SplitSeqV2(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 78;
};

struct SplitSeqV1 : Rule{
SplitSeqV1(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 86;
};

//dummy class
struct Custom : Rule{
Custom() : Rule(0,0,BarcodeEnd::FIVE,0){}
Expand Down
Loading

0 comments on commit 9b30db4

Please sign in to comment.