From e2b92cba8c95413b5531e29cc6793baeb7b6ef2b Mon Sep 17 00:00:00 2001
From: Gaurav <gameider12@gmail.com>
Date: Tue, 23 Nov 2021 11:18:27 -0500
Subject: [PATCH 01/13] Fix formatting issues

---
 doc/source/alevin.rst | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/doc/source/alevin.rst b/doc/source/alevin.rst
index 92449a215..4e119380f 100644
--- a/doc/source/alevin.rst
+++ b/doc/source/alevin.rst
@@ -187,12 +187,8 @@ Single-cell protocol specific notes
 In cases where single-cell protocol supports variable length cellbarcodes, alevin adds nucleotide padding to make the lengths uniform.
 Furthermore, the padding scheme ensures that there are no collisions added in the process. The padding scheme is as follows:
 
-1. sci-RNA-seq3: The barcode is composed of 9-10 bp hairpin adaptor and 10 bp reverse transcription index making it 19-20 bp long. If 
-the bacode is 20 bp long, alevin adds `A` and it adds `AC` if it is 19 bp long. Thus, the length of barcode in the output is 21 bp. 
-2. inDropV2: 8-11 bp barcode1 along with 8 bp barcode2 makes up the barcode. For barcode lengths of 16, 17, 18, and 19 bp, alevin adds
-`AAAC`, `AAG`, `AT`, and `A` respectively. Thus, the length of barcode in the output is 20 bp. Furthermore, the position of barcode1 is
-dependent on finding exact match of sequence `w1`. If exact match is not found, a search for `w1` is performed allowing a maximum hamming
- distance 2 b/w `w1` and read2 substring of w1 length within the required bounds; the first match is returned.  
+1. sci-RNA-seq3: The barcode is composed of 9-10 bp hairpin adaptor and 10 bp reverse transcription index making it 19-20 bp long. If the bacode is 20 bp long, alevin adds *A* and it adds *AC* if it is 19 bp long. Thus, the length of barcode in the output is 21 bp.
+2. inDropV2: 8-11 bp barcode1 along with 8 bp barcode2 makes up the barcode. For barcode lengths of 16, 17, 18, and 19 bp, alevin adds *AAAC*, *AAG*, *AT*, and *A* respectively. Thus, the length of barcode in the output is 20 bp. Furthermore, the position of barcode1 is dependent on finding exact match of sequence ``w1``. If exact match is not found, a search for ``w1`` is performed allowing a maximum hamming distance 2 b/w ``w1`` and read2 substring of w1 length within the required bounds; the first match is returned.   
 
 Output
 ------

From 87e7e28be600d7885dfb89e9c08c354fd0e2576e Mon Sep 17 00:00:00 2001
From: Gaurav <gameider12@gmail.com>
Date: Tue, 5 Oct 2021 16:07:20 -0400
Subject: [PATCH 02/13] add split-seqV2 protocol

---
 include/SalmonDefaults.hpp      |  1 +
 include/SingleCellProtocols.hpp |  4 ++++
 src/Alevin.cpp                  |  8 ++++++++
 src/AlevinHash.cpp              |  4 ++++
 src/AlevinUtils.cpp             | 36 +++++++++++++++++++++++++++++----
 src/CollapsedCellOptimizer.cpp  | 10 +++++++++
 src/GZipWriter.cpp              |  7 +++++++
 src/ProgramOptionsGenerator.cpp |  3 +++
 src/SalmonAlevin.cpp            | 13 ++++++++++++
 src/WhiteList.cpp               |  4 ++++
 10 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/include/SalmonDefaults.hpp b/include/SalmonDefaults.hpp
index e5d7f1d26..c2da9ab0e 100644
--- a/include/SalmonDefaults.hpp
+++ b/include/SalmonDefaults.hpp
@@ -139,6 +139,7 @@ namespace defaults {
   constexpr const bool isCITESeq{false};
   constexpr const bool isCELSeq{false};
   constexpr const bool isCELSeq2{false};
+  constexpr const bool isSplitSeqV2{false};
   constexpr const bool isQuartzSeq2{false};
   constexpr const bool isSciSeq3{false};
   constexpr const bool noQuant{false};
diff --git a/include/SingleCellProtocols.hpp b/include/SingleCellProtocols.hpp
index 63880a15f..6fde8b705 100644
--- a/include/SingleCellProtocols.hpp
+++ b/include/SingleCellProtocols.hpp
@@ -177,6 +177,10 @@ namespace alevin{
       CELSeq2(): Rule(6, 6, BarcodeEnd::FIVE, 4096){}
     };
 
+    struct SplitSeqV2 : Rule{
+        SplitSeqV2(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
+    };
+
     //dummy class
     struct Custom : Rule{
       Custom() : Rule(0,0,BarcodeEnd::FIVE,0){}
diff --git a/src/Alevin.cpp b/src/Alevin.cpp
index 94f9e4d5d..591c92727 100644
--- a/src/Alevin.cpp
+++ b/src/Alevin.cpp
@@ -1029,6 +1029,7 @@ salmon-based processing of single-cell RNA-seq data.
     bool gemcode = vm["gemcode"].as<bool>();
     bool celseq = vm["celseq"].as<bool>();
     bool celseq2 = vm["celseq2"].as<bool>();
+    bool splitseqV2 = vm["splitseqV2"].as<bool>();
     bool quartzseq2 = vm["quartzseq2"].as<bool>();
     bool sciseq3 = vm["sciseq3"].as<bool>();
     bool custom_old =  vm.count("barcodeLength") and
@@ -1147,6 +1148,13 @@ salmon-based processing of single-cell RNA-seq data.
                        vm, commentString, noTgMap,
                        barcodeFiles, readFiles, salmonIndex);
     }
+    else if(splitseqV2){
+      AlevinOpts<apt::SplitSeqV2> aopt;
+      //aopt.jointLog->warn("Using Split-SeqV2 Setting for Alevin");
+      initiatePipeline(aopt, sopt, orderedOptions,
+                       vm, commentString, noTgMap,
+                       barcodeFiles, readFiles, salmonIndex);
+    }
     else if(quartzseq2){
       AlevinOpts<apt::QuartzSeq2> aopt;
       //aopt.jointLog->warn("Using Quartz-Seq2 Setting for Alevin");
diff --git a/src/AlevinHash.cpp b/src/AlevinHash.cpp
index 039b81901..f0f2867ff 100644
--- a/src/AlevinHash.cpp
+++ b/src/AlevinHash.cpp
@@ -306,6 +306,10 @@ int salmonHashQuantify(AlevinOpts<apt::CELSeq2>& aopt,
                        bfs::path& outputDirectory,
                        CFreqMapT& freqCounter);
 template
+int salmonHashQuantify(AlevinOpts<apt::SplitSeqV2>& aopt,
+                       bfs::path& outputDirectory,
+                       CFreqMapT& freqCounter);
+template
 int salmonHashQuantify(AlevinOpts<apt::QuartzSeq2>& aopt,
                        bfs::path& outputDirectory,
                        CFreqMapT& freqCounter);
diff --git a/src/AlevinUtils.cpp b/src/AlevinUtils.cpp
index d4165ec44..442c5045e 100644
--- a/src/AlevinUtils.cpp
+++ b/src/AlevinUtils.cpp
@@ -92,6 +92,13 @@ namespace alevin {
       (void)seq;
       return &seq2;
     }
+    std::string*  getReadSequence(apt::SplitSeqV2& protocol,
+                         std::string& seq,
+                         std::string& seq2,
+                         std::string& subseq){
+      (void)seq2; // fastq2 contains barcode and umi
+      return &seq;
+    }
     template <>
     std::string*  getReadSequence(apt::QuartzSeq2& protocol,
                          std::string& seq,
@@ -180,6 +187,15 @@ namespace alevin {
         (umi.assign(read, pt.barcodeLength, pt.umiLength), true) : false;
     }
     template <>
+    bool extractUMI<apt::SplitSeqV2>(std::string& read,
+                                     std::string& read2,
+                                     apt::SplitSeqV2& pt,
+                                     std::string& umi){
+      (void)read;
+      return (read2.length() >= pt.barcodeLength + pt.umiLength) ?
+        (umi.assign(read2, 0, pt.umiLength), true) : false;
+    }
+    template <>
     bool extractUMI<apt::Gemcode>(std::string& read,
                                   std::string& read2,
                                    apt::Gemcode& pt,
@@ -273,8 +289,8 @@ namespace alevin {
     template <>
     bool extractBarcode<apt::CITESeq>(std::string& read,
                                       std::string& read2,
-                                                               apt::CITESeq& pt,
-                                                               std::string& bc){
+                                      apt::CITESeq& pt,
+                                      std::string& bc){
       (void)read2;
       return (read.length() >= pt.barcodeLength) ?
         (bc.assign(read, 0, pt.barcodeLength), true) : false;
@@ -282,8 +298,8 @@ namespace alevin {
     template <>
     bool extractBarcode<apt::ChromiumV3>(std::string& read,
                                          std::string& read2,
-                                                                  apt::ChromiumV3& pt,
-                                                                  std::string& bc){
+                                         apt::ChromiumV3& pt,
+                                         std::string& bc){
       (void)read2;
       return (read.length() >= pt.barcodeLength) ?
         (bc.assign(read,0, pt.barcodeLength), true) : false;
@@ -326,6 +342,14 @@ namespace alevin {
       } else {
         return false;
       }
+    template <>
+    bool extractBarcode<apt::SplitSeqV2>(std::string& read,
+                                      std::string& read2,
+                                      apt::SplitSeqV2& pt,
+                                      std::string& bc){
+      (void)read2;
+      return (read.length() >= pt.barcodeLength) ?
+        (bc.assign(read, 0, pt.barcodeLength), true) : false;
     }
     template <>
     bool extractBarcode<apt::Custom>(std::string& read,
@@ -1387,6 +1411,10 @@ namespace alevin {
                            SalmonOpts& sopt, bool noTgMap,
                            boost::program_options::variables_map& vm);
     template
+    bool processAlevinOpts(AlevinOpts<apt::SplitSeqV2>& aopt,
+                           SalmonOpts& sopt, bool noTgMap,
+                           boost::program_options::variables_map& vm);
+    template
     bool processAlevinOpts(AlevinOpts<apt::QuartzSeq2>& aopt,
                            SalmonOpts& sopt, bool noTgMap,
                            boost::program_options::variables_map& vm);
diff --git a/src/CollapsedCellOptimizer.cpp b/src/CollapsedCellOptimizer.cpp
index b1f3c7775..42b154bcc 100644
--- a/src/CollapsedCellOptimizer.cpp
+++ b/src/CollapsedCellOptimizer.cpp
@@ -1495,6 +1495,16 @@ bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
                                       CFreqMapT& freqCounter,
                                       size_t numLowConfidentBarcode);
 template
+bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
+                                      spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
+                                      spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
+                                      AlevinOpts<apt::SplitSeqV2>& aopt,
+                                      GZipWriter& gzw,
+                                      std::vector<std::string>& trueBarcodes,
+                                      std::vector<uint32_t>& umiCount,
+                                      CFreqMapT& freqCounter,
+                                      size_t numLowConfidentBarcode);
+template
 bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
                                       spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
                                       spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp
index 1fb2fed9b..18b4d6e1a 100644
--- a/src/GZipWriter.cpp
+++ b/src/GZipWriter.cpp
@@ -1889,6 +1889,10 @@ bool GZipWriter::writeEquivCounts<SCExpT, apt::CELSeq2>(
                                                         const AlevinOpts<apt::CELSeq2>& aopts,
                                                         SCExpT& readExp);
 template
+bool GZipWriter::writeEquivCounts<SCExpT, apt::SplitSeqV2>(
+                                                        const AlevinOpts<apt::SplitSeqV2>& aopts,
+                                                        SCExpT& readExp);
+template
 bool GZipWriter::writeEquivCounts<SCExpT, apt::QuartzSeq2>(
                                                         const AlevinOpts<apt::QuartzSeq2>& aopts,
                                                         SCExpT& readExp);
@@ -1926,6 +1930,9 @@ template bool
 GZipWriter::writeMetaAlevin<apt::CELSeq2>(const AlevinOpts<apt::CELSeq2>& opts,
                                           boost::filesystem::path aux_dir);
 template bool
+GZipWriter::writeMetaAlevin<apt::SplitSeqV2>(const AlevinOpts<apt::SplitSeqV2>& opts,
+                                          boost::filesystem::path aux_dir);
+template bool
 GZipWriter::writeMetaAlevin<apt::QuartzSeq2>(const AlevinOpts<apt::QuartzSeq2>& opts,
                                              boost::filesystem::path aux_dir);
 template bool
diff --git a/src/ProgramOptionsGenerator.cpp b/src/ProgramOptionsGenerator.cpp
index 2a7d2766f..e20e90022 100644
--- a/src/ProgramOptionsGenerator.cpp
+++ b/src/ProgramOptionsGenerator.cpp
@@ -410,6 +410,9 @@ namespace salmon {
       (
        "celseq2", po::bool_switch()->default_value(alevin::defaults::isCELSeq2),
        "Use CEL-Seq2 Single Cell protocol for the library.")
+      (
+       "splitseqV2", po::bool_switch()->default_value(alevin::defaults::isSplitSeqV2),
+       "Use Split-SeqV2 Single Cell protocol for the library.")
       (
        "quartzseq2", po::bool_switch()->default_value(alevin::defaults::isQuartzSeq2),
        "Use Quartz-Seq2 v3.2 Single Cell protocol for the library assumes 15 length barcode and 8 length UMI.")
diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 52c27fffb..8b4fc5d97 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -3138,6 +3138,19 @@ alevinQuant(AlevinOpts<apt::CELSeq2>& aopt, SalmonOpts& sopt,
             CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
             std::unique_ptr<SalmonIndex>& salmonIndex);
 
+template int
+alevin_sc_align(AlevinOpts<apt::SplitSeqV2>& aopt, SalmonOpts& sopt,
+                boost::program_options::parsed_options& orderedOptions,
+                std::unique_ptr<SalmonIndex>& salmonIndex);
+template int
+alevinQuant(AlevinOpts<apt::SplitSeqV2>& aopt, SalmonOpts& sopt,
+            SoftMapT& barcodeMap, TrueBcsT& trueBarcodes,
+            spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
+            spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
+            boost::program_options::parsed_options& orderedOptions,
+            CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
+            std::unique_ptr<SalmonIndex>& salmonIndex);
+
 template int
 alevin_sc_align(AlevinOpts<apt::QuartzSeq2>& aopt, SalmonOpts& sopt,
                 boost::program_options::parsed_options& orderedOptions,
diff --git a/src/WhiteList.cpp b/src/WhiteList.cpp
index de6636262..ccbeb5bcb 100644
--- a/src/WhiteList.cpp
+++ b/src/WhiteList.cpp
@@ -284,6 +284,10 @@ namespace alevin {
                                       std::vector<std::string>& trueBarcodes,
                                       bool useRibo, bool useMito,
                                       size_t numLowConfidentBarcode);
+    template bool performWhitelisting(AlevinOpts<alevin::protocols::SplitSeqV2>& aopt,
+                                      std::vector<std::string>& trueBarcodes,
+                                      bool useRibo, bool useMito,
+                                      size_t numLowConfidentBarcode);
     template bool performWhitelisting(AlevinOpts<alevin::protocols::QuartzSeq2>& aopt,
                                       std::vector<std::string>& trueBarcodes,
                                       bool useRibo, bool useMito,

From 0e8e208741e86a0fbbc3c1ab5fa9b0933ded0385 Mon Sep 17 00:00:00 2001
From: Gaurav <gameider12@gmail.com>
Date: Fri, 5 Nov 2021 11:17:44 -0400
Subject: [PATCH 03/13] protocol modifications that missed commit

---
 include/SingleCellProtocols.hpp | 1 +
 src/Alevin.cpp                  | 1 +
 src/AlevinUtils.cpp             | 9 ++++++---
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/SingleCellProtocols.hpp b/include/SingleCellProtocols.hpp
index 6fde8b705..eb44f13bd 100644
--- a/include/SingleCellProtocols.hpp
+++ b/include/SingleCellProtocols.hpp
@@ -179,6 +179,7 @@ namespace alevin{
 
     struct SplitSeqV2 : Rule{
         SplitSeqV2(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
+        std::size_t const bcLen = 8, bc1Pos = 78, bc2Pos = 48, bc3Pos = 10;
     };
 
     //dummy class
diff --git a/src/Alevin.cpp b/src/Alevin.cpp
index 591c92727..bc5bfc8c7 100644
--- a/src/Alevin.cpp
+++ b/src/Alevin.cpp
@@ -1048,6 +1048,7 @@ salmon-based processing of single-cell RNA-seq data.
     if (gemcode) validate_num_protocols += 1;
     if (celseq) validate_num_protocols += 1;
     if (celseq2) validate_num_protocols += 1;
+    if (splitseqV2) validate_num_protocols += 1;
     if (quartzseq2) validate_num_protocols += 1;
     if (sciseq3) validate_num_protocols += 1;
     if (custom) validate_num_protocols += 1;
diff --git a/src/AlevinUtils.cpp b/src/AlevinUtils.cpp
index 442c5045e..74e8de079 100644
--- a/src/AlevinUtils.cpp
+++ b/src/AlevinUtils.cpp
@@ -92,6 +92,7 @@ namespace alevin {
       (void)seq;
       return &seq2;
     }
+    template <>
     std::string*  getReadSequence(apt::SplitSeqV2& protocol,
                          std::string& seq,
                          std::string& seq2,
@@ -347,9 +348,11 @@ namespace alevin {
                                       std::string& read2,
                                       apt::SplitSeqV2& pt,
                                       std::string& bc){
-      (void)read2;
-      return (read.length() >= pt.barcodeLength) ?
-        (bc.assign(read, 0, pt.barcodeLength), true) : false;
+      (void)read;
+
+      return (read2.length() >= pt.barcodeLength) ?
+        (bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
+        + read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
     }
     template <>
     bool extractBarcode<apt::Custom>(std::string& read,

From 56094b7e24a6dd6190758ba89ca9937892e2c2c7 Mon Sep 17 00:00:00 2001
From: Gaurav <gameider12@gmail.com>
Date: Fri, 3 Dec 2021 10:18:00 -0500
Subject: [PATCH 04/13] add split-seqV1 single-cell protocol

---
 include/SalmonDefaults.hpp      |  1 +
 include/SingleCellProtocols.hpp |  5 +++++
 src/Alevin.cpp                  |  9 +++++++++
 src/AlevinHash.cpp              |  4 ++++
 src/AlevinUtils.cpp             | 35 ++++++++++++++++++++++++++++++++-
 src/CollapsedCellOptimizer.cpp  | 10 ++++++++++
 src/GZipWriter.cpp              |  7 +++++++
 src/ProgramOptionsGenerator.cpp |  3 +++
 src/SalmonAlevin.cpp            | 13 ++++++++++++
 src/WhiteList.cpp               |  4 ++++
 10 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/SalmonDefaults.hpp b/include/SalmonDefaults.hpp
index c2da9ab0e..6016d8bd4 100644
--- a/include/SalmonDefaults.hpp
+++ b/include/SalmonDefaults.hpp
@@ -139,6 +139,7 @@ namespace defaults {
   constexpr const bool isCITESeq{false};
   constexpr const bool isCELSeq{false};
   constexpr const bool isCELSeq2{false};
+  constexpr const bool isSplitSeqV1{false};
   constexpr const bool isSplitSeqV2{false};
   constexpr const bool isQuartzSeq2{false};
   constexpr const bool isSciSeq3{false};
diff --git a/include/SingleCellProtocols.hpp b/include/SingleCellProtocols.hpp
index eb44f13bd..d62aaa0f0 100644
--- a/include/SingleCellProtocols.hpp
+++ b/include/SingleCellProtocols.hpp
@@ -182,6 +182,11 @@ namespace alevin{
         std::size_t const bcLen = 8, bc1Pos = 78, bc2Pos = 48, bc3Pos = 10;
     };
 
+    struct SplitSeqV1 : Rule{
+        SplitSeqV1(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
+        std::size_t const bcLen = 8, bc1Pos = 86, bc2Pos = 48, bc3Pos = 10;
+    };
+
     //dummy class
     struct Custom : Rule{
       Custom() : Rule(0,0,BarcodeEnd::FIVE,0){}
diff --git a/src/Alevin.cpp b/src/Alevin.cpp
index bc5bfc8c7..8246b7263 100644
--- a/src/Alevin.cpp
+++ b/src/Alevin.cpp
@@ -1029,6 +1029,7 @@ salmon-based processing of single-cell RNA-seq data.
     bool gemcode = vm["gemcode"].as<bool>();
     bool celseq = vm["celseq"].as<bool>();
     bool celseq2 = vm["celseq2"].as<bool>();
+    bool splitseqV1 = vm["splitseqV1"].as<bool>();
     bool splitseqV2 = vm["splitseqV2"].as<bool>();
     bool quartzseq2 = vm["quartzseq2"].as<bool>();
     bool sciseq3 = vm["sciseq3"].as<bool>();
@@ -1048,6 +1049,7 @@ salmon-based processing of single-cell RNA-seq data.
     if (gemcode) validate_num_protocols += 1;
     if (celseq) validate_num_protocols += 1;
     if (celseq2) validate_num_protocols += 1;
+    if (splitseqV1) validate_num_protocols += 1;
     if (splitseqV2) validate_num_protocols += 1;
     if (quartzseq2) validate_num_protocols += 1;
     if (sciseq3) validate_num_protocols += 1;
@@ -1149,6 +1151,13 @@ salmon-based processing of single-cell RNA-seq data.
                        vm, commentString, noTgMap,
                        barcodeFiles, readFiles, salmonIndex);
     }
+    else if(splitseqV1){
+      AlevinOpts<apt::SplitSeqV1> aopt;
+      //aopt.jointLog->warn("Using Split-SeqV2 Setting for Alevin");
+      initiatePipeline(aopt, sopt, orderedOptions,
+                       vm, commentString, noTgMap,
+                       barcodeFiles, readFiles, salmonIndex);
+    }
     else if(splitseqV2){
       AlevinOpts<apt::SplitSeqV2> aopt;
       //aopt.jointLog->warn("Using Split-SeqV2 Setting for Alevin");
diff --git a/src/AlevinHash.cpp b/src/AlevinHash.cpp
index f0f2867ff..8bddb0636 100644
--- a/src/AlevinHash.cpp
+++ b/src/AlevinHash.cpp
@@ -306,6 +306,10 @@ int salmonHashQuantify(AlevinOpts<apt::CELSeq2>& aopt,
                        bfs::path& outputDirectory,
                        CFreqMapT& freqCounter);
 template
+int salmonHashQuantify(AlevinOpts<apt::SplitSeqV1>& aopt,
+                       bfs::path& outputDirectory,
+                       CFreqMapT& freqCounter);
+template
 int salmonHashQuantify(AlevinOpts<apt::SplitSeqV2>& aopt,
                        bfs::path& outputDirectory,
                        CFreqMapT& freqCounter);
diff --git a/src/AlevinUtils.cpp b/src/AlevinUtils.cpp
index 74e8de079..6f2e72344 100644
--- a/src/AlevinUtils.cpp
+++ b/src/AlevinUtils.cpp
@@ -93,6 +93,14 @@ namespace alevin {
       return &seq2;
     }
     template <>
+    std::string*  getReadSequence(apt::SplitSeqV1& protocol,
+                         std::string& seq,
+                         std::string& seq2,
+                         std::string& subseq){
+      (void)seq2; // fastq2 contains barcode and umi
+      return &seq;
+    }
+    template <>
     std::string*  getReadSequence(apt::SplitSeqV2& protocol,
                          std::string& seq,
                          std::string& seq2,
@@ -188,6 +196,15 @@ namespace alevin {
         (umi.assign(read, pt.barcodeLength, pt.umiLength), true) : false;
     }
     template <>
+    bool extractUMI<apt::SplitSeqV1>(std::string& read,
+                                     std::string& read2,
+                                     apt::SplitSeqV1& pt,
+                                     std::string& umi){
+      (void)read;
+      return (read2.length() >= pt.barcodeLength + pt.umiLength) ?
+        (umi.assign(read2, 0, pt.umiLength), true) : false;
+    }
+    template <>
     bool extractUMI<apt::SplitSeqV2>(std::string& read,
                                      std::string& read2,
                                      apt::SplitSeqV2& pt,
@@ -343,6 +360,18 @@ namespace alevin {
       } else {
         return false;
       }
+    }
+    template <>
+    bool extractBarcode<apt::SplitSeqV1>(std::string& read,
+                                      std::string& read2,
+                                      apt::SplitSeqV1& pt,
+                                      std::string& bc){
+      (void)read;
+
+      return (read2.length() >= pt.bc1Pos + pt.bcLen) ?
+        (bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
+        + read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
+    }
     template <>
     bool extractBarcode<apt::SplitSeqV2>(std::string& read,
                                       std::string& read2,
@@ -350,7 +379,7 @@ namespace alevin {
                                       std::string& bc){
       (void)read;
 
-      return (read2.length() >= pt.barcodeLength) ?
+      return (read2.length() >= pt.bc1Pos + pt.bcLen) ?
         (bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
         + read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
     }
@@ -1414,6 +1443,10 @@ namespace alevin {
                            SalmonOpts& sopt, bool noTgMap,
                            boost::program_options::variables_map& vm);
     template
+    bool processAlevinOpts(AlevinOpts<apt::SplitSeqV1>& aopt,
+                           SalmonOpts& sopt, bool noTgMap,
+                           boost::program_options::variables_map& vm);
+    template
     bool processAlevinOpts(AlevinOpts<apt::SplitSeqV2>& aopt,
                            SalmonOpts& sopt, bool noTgMap,
                            boost::program_options::variables_map& vm);
diff --git a/src/CollapsedCellOptimizer.cpp b/src/CollapsedCellOptimizer.cpp
index 42b154bcc..5c868864e 100644
--- a/src/CollapsedCellOptimizer.cpp
+++ b/src/CollapsedCellOptimizer.cpp
@@ -1495,6 +1495,16 @@ bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
                                       CFreqMapT& freqCounter,
                                       size_t numLowConfidentBarcode);
 template
+bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
+                                      spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
+                                      spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
+                                      AlevinOpts<apt::SplitSeqV1>& aopt,
+                                      GZipWriter& gzw,
+                                      std::vector<std::string>& trueBarcodes,
+                                      std::vector<uint32_t>& umiCount,
+                                      CFreqMapT& freqCounter,
+                                      size_t numLowConfidentBarcode);
+template
 bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
                                       spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
                                       spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp
index 18b4d6e1a..0fe3e4ff2 100644
--- a/src/GZipWriter.cpp
+++ b/src/GZipWriter.cpp
@@ -1889,6 +1889,10 @@ bool GZipWriter::writeEquivCounts<SCExpT, apt::CELSeq2>(
                                                         const AlevinOpts<apt::CELSeq2>& aopts,
                                                         SCExpT& readExp);
 template
+bool GZipWriter::writeEquivCounts<SCExpT, apt::SplitSeqV1>(
+                                                        const AlevinOpts<apt::SplitSeqV1>& aopts,
+                                                        SCExpT& readExp);
+template
 bool GZipWriter::writeEquivCounts<SCExpT, apt::SplitSeqV2>(
                                                         const AlevinOpts<apt::SplitSeqV2>& aopts,
                                                         SCExpT& readExp);
@@ -1930,6 +1934,9 @@ template bool
 GZipWriter::writeMetaAlevin<apt::CELSeq2>(const AlevinOpts<apt::CELSeq2>& opts,
                                           boost::filesystem::path aux_dir);
 template bool
+GZipWriter::writeMetaAlevin<apt::SplitSeqV1>(const AlevinOpts<apt::SplitSeqV1>& opts,
+                                          boost::filesystem::path aux_dir);
+template bool
 GZipWriter::writeMetaAlevin<apt::SplitSeqV2>(const AlevinOpts<apt::SplitSeqV2>& opts,
                                           boost::filesystem::path aux_dir);
 template bool
diff --git a/src/ProgramOptionsGenerator.cpp b/src/ProgramOptionsGenerator.cpp
index e20e90022..58b699e31 100644
--- a/src/ProgramOptionsGenerator.cpp
+++ b/src/ProgramOptionsGenerator.cpp
@@ -410,6 +410,9 @@ namespace salmon {
       (
        "celseq2", po::bool_switch()->default_value(alevin::defaults::isCELSeq2),
        "Use CEL-Seq2 Single Cell protocol for the library.")
+      (
+       "splitseqV1", po::bool_switch()->default_value(alevin::defaults::isSplitSeqV1),
+       "Use Split-SeqV1 Single Cell protocol for the library.")
       (
        "splitseqV2", po::bool_switch()->default_value(alevin::defaults::isSplitSeqV2),
        "Use Split-SeqV2 Single Cell protocol for the library.")
diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 8b4fc5d97..6009353d1 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -3138,6 +3138,19 @@ alevinQuant(AlevinOpts<apt::CELSeq2>& aopt, SalmonOpts& sopt,
             CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
             std::unique_ptr<SalmonIndex>& salmonIndex);
 
+template int
+alevin_sc_align(AlevinOpts<apt::SplitSeqV1>& aopt, SalmonOpts& sopt,
+                boost::program_options::parsed_options& orderedOptions,
+                std::unique_ptr<SalmonIndex>& salmonIndex);
+template int
+alevinQuant(AlevinOpts<apt::SplitSeqV1>& aopt, SalmonOpts& sopt,
+            SoftMapT& barcodeMap, TrueBcsT& trueBarcodes,
+            spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
+            spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
+            boost::program_options::parsed_options& orderedOptions,
+            CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
+            std::unique_ptr<SalmonIndex>& salmonIndex);
+
 template int
 alevin_sc_align(AlevinOpts<apt::SplitSeqV2>& aopt, SalmonOpts& sopt,
                 boost::program_options::parsed_options& orderedOptions,
diff --git a/src/WhiteList.cpp b/src/WhiteList.cpp
index ccbeb5bcb..39ce75fcf 100644
--- a/src/WhiteList.cpp
+++ b/src/WhiteList.cpp
@@ -284,6 +284,10 @@ namespace alevin {
                                       std::vector<std::string>& trueBarcodes,
                                       bool useRibo, bool useMito,
                                       size_t numLowConfidentBarcode);
+    template bool performWhitelisting(AlevinOpts<alevin::protocols::SplitSeqV1>& aopt,
+                                      std::vector<std::string>& trueBarcodes,
+                                      bool useRibo, bool useMito,
+                                      size_t numLowConfidentBarcode);
     template bool performWhitelisting(AlevinOpts<alevin::protocols::SplitSeqV2>& aopt,
                                       std::vector<std::string>& trueBarcodes,
                                       bool useRibo, bool useMito,

From efc2ce971a245b01422e93b661b89f5891090c82 Mon Sep 17 00:00:00 2001
From: Gaurav <gameider12@gmail.com>
Date: Tue, 7 Dec 2021 15:24:56 -0500
Subject: [PATCH 05/13] swap bc1 and bc3

---
 include/SingleCellProtocols.hpp | 4 ++--
 src/AlevinUtils.cpp             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/SingleCellProtocols.hpp b/include/SingleCellProtocols.hpp
index d62aaa0f0..bd93cc256 100644
--- a/include/SingleCellProtocols.hpp
+++ b/include/SingleCellProtocols.hpp
@@ -179,12 +179,12 @@ namespace alevin{
 
     struct SplitSeqV2 : Rule{
         SplitSeqV2(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
-        std::size_t const bcLen = 8, bc1Pos = 78, bc2Pos = 48, bc3Pos = 10;
+        std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 78;
     };
 
     struct SplitSeqV1 : Rule{
         SplitSeqV1(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
-        std::size_t const bcLen = 8, bc1Pos = 86, bc2Pos = 48, bc3Pos = 10;
+        std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 86;
     };
 
     //dummy class
diff --git a/src/AlevinUtils.cpp b/src/AlevinUtils.cpp
index 6f2e72344..a5bad4a9f 100644
--- a/src/AlevinUtils.cpp
+++ b/src/AlevinUtils.cpp
@@ -368,7 +368,7 @@ namespace alevin {
                                       std::string& bc){
       (void)read;
 
-      return (read2.length() >= pt.bc1Pos + pt.bcLen) ?
+      return (read2.length() >= pt.bc3Pos + pt.bcLen) ?
         (bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
         + read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
     }
@@ -379,7 +379,7 @@ namespace alevin {
                                       std::string& bc){
       (void)read;
 
-      return (read2.length() >= pt.bc1Pos + pt.bcLen) ?
+      return (read2.length() >= pt.bc3Pos + pt.bcLen) ?
         (bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
         + read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
     }

From 90e348ff94aa3bf6940f4f211a05ddf8d325e466 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Wed, 29 Dec 2021 17:30:45 -0500
Subject: [PATCH 06/13] do valid value of k checking before calling out to
 indexer

---
 src/BuildSalmonIndex.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/BuildSalmonIndex.cpp b/src/BuildSalmonIndex.cpp
index a9fc758fb..96e49f860 100644
--- a/src/BuildSalmonIndex.cpp
+++ b/src/BuildSalmonIndex.cpp
@@ -200,12 +200,21 @@ Creates a salmon index.
     // Build a quasi-mapping index
     if (usePuff) {
       idxOpt.outdir = indexDirectory.string();
-      if (idxOpt.k == 0) {
+      uint32_t k = idxOpt.k;
+      if (k == 0) {
         jointLog->info(
             "You cannot have a k-mer length of 0 with the pufferfish index.");
         jointLog->info("Setting to the default value of 31.");
         idxOpt.k = 31;
+      } else if (k % 2 == 0) {
+        jointLog->critical("Error: k must be an odd value, you chose {}.", k);
+        return 1;
+      } else if (k > 31) {
+        jointLog->critical("Error: k must not be larger than 31, you chose {}.", k);
+        return 1;
       }
+      // if we reach here, k is OK, either by virtue
+      // of the value passed, or of us setting it to 31.
 
       // give the user a warning if they are not using any decoy file
       if (idxOpt.decoy_file.empty()) {

From fbd32b32d67104eb82ad612a9d6cb0d331213871 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Sun, 9 Jan 2022 00:54:44 -0500
Subject: [PATCH 07/13] optimization of sketch filtering

---
 src/SalmonAlevin.cpp | 86 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 18 deletions(-)

diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 6009353d1..99a5d1db0 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -541,6 +541,10 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
       return added;
     }
 
+    inline uint32_t max_hits_for_target() {
+      return std::max(fw_hits, rc_hits);
+    }
+
     // true if forward, false if rc
     // second element is score
     inline HitDirection best_hit_direction() {
@@ -721,7 +725,8 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                 uint64_t largest_occ{0};
                 float perfect_score{0.0}; 
                 auto& raw_hits = memCollector.get_left_hits();
-                
+
+
                 // SANITY
                 decltype(raw_hits[0].first) prev_read_pos = -1;
                 // the maximum span the supporting k-mers of a 
@@ -751,11 +756,11 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                   if (read_pos <= prev_read_pos) {
                     salmonOpts.jointLog->warn("read_pos : {}, prev_read_pos : {}", read_pos, prev_read_pos);
                   }
-                  
+
+                  bool still_have_valid_target = false;
                   prev_read_pos = read_pos;
                   if (num_occ < salmonOpts.maxReadOccs) {
                     
-                    ++num_valid_hits;
                     total_occs += num_occ;
                     largest_occ = (num_occ > largest_occ) ? num_occ : largest_occ;
                     float score_inc = 1.0 / num_occ;
@@ -766,18 +771,44 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                       uint32_t tid = static_cast<uint32_t>(qidx->getRefId(pos_it.transcript_id()));
                       int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
                       bool ori = ref_pos_ori.isFW;
-                      if (ori) {
-                        hit_map[tid].add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
-                      } else {
-                        hit_map[tid].add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                      auto& target = hit_map[tid];
+
+                      // why >= here instead of ==?
+                      // Because hits can happen on the same target in both the forward
+                      // and rc orientations, it is possible that we start the loop with
+                      // the target having num_valid_hits hits in a given orientation (o)
+                      // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
+                      // then we see a hit for this target in orientation rc(o).  We still want to
+                      // add / consider this hit, but max_hits_for_target() > num_valid_hits.
+                      // So, we must allow for that here.
+                      if (target.max_hits_for_target() >= num_valid_hits) {
+                        //if (target.max_hits_for_target() > num_valid_hits) { salmonOpts.jointLog->info("WTF : mhft {}, nvh {}", target.max_hits_for_target(), num_valid_hits); }
+                        if (ori) {
+                          target.add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                        } else {
+                          target.add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                        }
+
+                        still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
                       }
+
                     } // DONE: for (auto &pos_it : refs)
+
+                    ++num_valid_hits;
+
+                    // if there are no targets reaching the valid hit threshold, then break early
+                    if (!still_have_valid_target) {
+                      break;
+                    }
+
+
+
                   } // DONE : if (static_cast<uint64_t>(refs.size()) < salmonOpts.maxReadOccs)
                 } // DONE : for (auto& raw_hit : raw_hits)
 
                 // If our default threshold was too stringent, then set a more liberal 
                 // threshold and look up the k-mers that occur the least frequently.
-                // Specifically, if the min occuring hits have frequency < min_thresh_prime (2500 by default)
+                // Specifically, if the min occuring hits have frequency < max_allowed_occ (2500 by default)
                 // times, then collect the min occuring hits to get the mapping.
                 // TODO: deal with code duplication below.
                 size_t max_allowed_occ = 2500;
@@ -799,10 +830,11 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                           prev_read_pos);
                     }
 
+                    bool still_have_valid_target = false;
+
                     prev_read_pos = read_pos;
                     if (num_occ <= max_allowed_occ) {
 
-                      ++num_valid_hits;
                       total_occs += num_occ;
                       largest_occ =
                           (num_occ > largest_occ) ? num_occ : largest_occ;
@@ -815,18 +847,37 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                             qidx->getRefId(pos_it.transcript_id()));
                         int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
                         bool ori = ref_pos_ori.isFW;
+                      auto& target = hit_map[tid];
+
+                      // why >= here instead of ==?
+                      // Because hits can happen on the same target in both the forward
+                      // and rc orientations, it is possible that we start the loop with
+                      // the target having num_valid_hits hits in a given orientation (o)
+                      // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
+                      // then we see a hit for this target in orientation rc(o).  We still want to
+                      // add / consider this hit, but max_hits_for_target() > num_valid_hits.
+                      // So, we must allow for that here.
+                      if (target.max_hits_for_target() >= num_valid_hits) {
                         if (ori) {
-                          hit_map[tid].add_fw(pos,
-                                              static_cast<int32_t>(read_pos),
-                                              max_stretch, score_inc);
+                          target.add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
                         } else {
-                          hit_map[tid].add_rc(pos,
-                                              static_cast<int32_t>(read_pos),
-                                              max_stretch, score_inc);
+                          target.add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
                         }
+
+                        still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
+                      }
+
+
                       } // DONE: for (auto &pos_it : refs)
-                    }   // DONE : if (static_cast<uint64_t>(refs.size()) <
-                        // salmonOpts.maxReadOccs)
+
+                      ++num_valid_hits;
+                      // if there are no targets reaching the valid hit threshold, then break early
+                      if (!still_have_valid_target) {
+                        break;
+                      }
+
+                    }   // DONE : if (num_occ <= max_allowed_occ)
+
                   }     // DONE : for (auto& raw_hit : raw_hits)
                 }
 
@@ -876,7 +927,6 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                   }
                 }
                   */
-
               } // DONE : if (rh)
 
             } else {

From 5166707dbcd276d16b3da5c1dd3a20dff98c3bc4 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Mon, 10 Jan 2022 17:13:28 -0500
Subject: [PATCH 08/13] Slight refactor and added an option

* Refactor sketch hit -> mapping code in alevin by
  breaking out redundant code into a lambda.

* Added a new command line parameter --maxRecoverReadOcc
  to set the threshold for the number of places a read can
  occur when we attempt recovery in alevin with --sketch.
  This was previously hard-coded, the default value now is the
  old hard-coded value.
---
 include/SalmonDefaults.hpp      |   1 +
 include/SalmonOpts.hpp          |   4 +
 src/ProgramOptionsGenerator.cpp |   6 ++
 src/SalmonAlevin.cpp            | 186 +++++++++++---------------------
 4 files changed, 75 insertions(+), 122 deletions(-)

diff --git a/include/SalmonDefaults.hpp b/include/SalmonDefaults.hpp
index 6016d8bd4..c68ce4660 100644
--- a/include/SalmonDefaults.hpp
+++ b/include/SalmonDefaults.hpp
@@ -61,6 +61,7 @@ namespace defaults {
   constexpr const uint32_t maxSMEMOccs{200};
   constexpr const bool initUniform{false};
   constexpr const uint32_t maxReadOccs{200};
+  constexpr const uint32_t maxRecoverReadOccs{2500};
   constexpr const uint32_t maxOccsPerHit{1000};
   constexpr const bool noLengthCorrection{false};
   constexpr const bool noEffectiveLengthCorrection{false};
diff --git a/include/SalmonOpts.hpp b/include/SalmonOpts.hpp
index a1d4c9e47..3d0be1989 100644
--- a/include/SalmonOpts.hpp
+++ b/include/SalmonOpts.hpp
@@ -146,6 +146,10 @@ struct SalmonOpts {
 
   uint32_t maxReadOccs; // Discard reads  mapping to more than this many places.
 
+  uint32_t maxRecoverReadOccs; // If a read had hits but maps to > maxReadOccs loci, then
+                               // try to recover mappings up to this many loci (currently only
+                               // affects behavior in alevin --sketch mode).
+
   uint32_t maxExpectedReadLen; // Maximum expected length of an observed read.
 
   // hidden / for extreme control
diff --git a/src/ProgramOptionsGenerator.cpp b/src/ProgramOptionsGenerator.cpp
index 58b699e31..e7f4da381 100644
--- a/src/ProgramOptionsGenerator.cpp
+++ b/src/ProgramOptionsGenerator.cpp
@@ -675,6 +675,12 @@ namespace salmon {
        po::value<uint32_t>(&(sopt.maxReadOccs))->default_value(salmon::defaults::maxReadOccs),
        "Reads \"mapping\" to more than this many places won't be "
        "considered.")
+      ("maxRecoverReadOcc",
+       po::value<uint32_t>(&(sopt.maxRecoverReadOccs))->default_value(salmon::defaults::maxRecoverReadOccs),
+       "Relevant for alevin with \'--sketch\' mode only: if a read has valid seed matches, but no read has matches "
+       "leading to fewer than \"maxReadOcc\" mappings, then try to recover mappings for this read as long as there are "
+       "fewer than \"maxRecoverReadOcc\" mappings."
+       )
       ("noLengthCorrection",
        po::bool_switch(&(sopt.noLengthCorrection))->default_value(salmon::defaults::noLengthCorrection),
        "[experimental] : Entirely disables length correction when "
diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 99a5d1db0..35816c503 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -602,7 +602,14 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
   //////////////////////
   // NOTE: validation mapping based new parameters
   std::string rc1; rc1.reserve(300);
-  
+
+
+  // check the frequency and decide here if we should
+  // be attempting recovery of highly-multimapping reads
+  const size_t max_occ_default = salmonOpts.maxReadOccs;
+  const size_t max_occ_recover = salmonOpts.maxRecoverReadOccs;
+  const bool attempt_occ_recover = (max_occ_recover > max_occ_default);
+
   size_t numMappingsDropped{0};
   size_t numDecoyFrags{0};
   const double decoyThreshold = salmonOpts.decoyThreshold;
@@ -723,15 +730,13 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                 uint32_t num_valid_hits{0};
                 uint64_t total_occs{0};
                 uint64_t largest_occ{0};
-                float perfect_score{0.0}; 
                 auto& raw_hits = memCollector.get_left_hits();
 
-
                 // SANITY
                 decltype(raw_hits[0].first) prev_read_pos = -1;
-                // the maximum span the supporting k-mers of a 
+                // the maximum span the supporting k-mers of a
                 // mapping position are allowed to have.
-                // NOTE this is still > read_length b/c the stretch is measured wrt the 
+                // NOTE this is still > read_length b/c the stretch is measured wrt the
                 // START of the terminal k-mer.
                 int32_t max_stretch = static_cast<int32_t>(readSubSeq->length() * 1.0);
 
@@ -740,81 +745,17 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                 // the least frequent hit for this fragment.
                 uint64_t min_occ = std::numeric_limits<uint64_t>::max();
 
-                // this is false by default and will be set to true 
-                // if *every* collected hit for this fragment occurs 
+                // this is false by default and will be set to true
+                // if *every* collected hit for this fragment occurs
                 // salmonOpts.maxReadOccs times or more.
                 bool had_alt_max_occ = false;
 
-                for (auto& raw_hit : raw_hits) {
-                  auto& read_pos = raw_hit.first;
-                  auto& proj_hits = raw_hit.second;
-                  auto& refs = proj_hits.refRange;
-                  uint64_t num_occ = static_cast<uint64_t>(refs.size());
-                  min_occ = std::min(min_occ, num_occ);
-
-                  // SANITY
-                  if (read_pos <= prev_read_pos) {
-                    salmonOpts.jointLog->warn("read_pos : {}, prev_read_pos : {}", read_pos, prev_read_pos);
-                  }
-
-                  bool still_have_valid_target = false;
-                  prev_read_pos = read_pos;
-                  if (num_occ < salmonOpts.maxReadOccs) {
-                    
-                    total_occs += num_occ;
-                    largest_occ = (num_occ > largest_occ) ? num_occ : largest_occ;
-                    float score_inc = 1.0 / num_occ;
-                    perfect_score += score_inc;
-
-                    for (auto &pos_it : refs) {
-                      const auto& ref_pos_ori = proj_hits.decodeHit(pos_it);
-                      uint32_t tid = static_cast<uint32_t>(qidx->getRefId(pos_it.transcript_id()));
-                      int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
-                      bool ori = ref_pos_ori.isFW;
-                      auto& target = hit_map[tid];
-
-                      // why >= here instead of ==?
-                      // Because hits can happen on the same target in both the forward
-                      // and rc orientations, it is possible that we start the loop with
-                      // the target having num_valid_hits hits in a given orientation (o)
-                      // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
-                      // then we see a hit for this target in orientation rc(o).  We still want to
-                      // add / consider this hit, but max_hits_for_target() > num_valid_hits.
-                      // So, we must allow for that here.
-                      if (target.max_hits_for_target() >= num_valid_hits) {
-                        //if (target.max_hits_for_target() > num_valid_hits) { salmonOpts.jointLog->info("WTF : mhft {}, nvh {}", target.max_hits_for_target(), num_valid_hits); }
-                        if (ori) {
-                          target.add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
-                        } else {
-                          target.add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
-                        }
-
-                        still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
-                      }
-
-                    } // DONE: for (auto &pos_it : refs)
-
-                    ++num_valid_hits;
-
-                    // if there are no targets reaching the valid hit threshold, then break early
-                    if (!still_have_valid_target) {
-                      break;
-                    }
-
-
-
-                  } // DONE : if (static_cast<uint64_t>(refs.size()) < salmonOpts.maxReadOccs)
-                } // DONE : for (auto& raw_hit : raw_hits)
-
-                // If our default threshold was too stringent, then set a more liberal 
-                // threshold and look up the k-mers that occur the least frequently.
-                // Specifically, if the min occuring hits have frequency < max_allowed_occ (2500 by default)
-                // times, then collect the min occuring hits to get the mapping.
-                // TODO: deal with code duplication below.
-                size_t max_allowed_occ = 2500;
-                if ((min_occ >= salmonOpts.maxReadOccs) and (min_occ < max_allowed_occ)) {
-                  prev_read_pos = -1;
-                  max_allowed_occ = min_occ;
+                auto collect_mappings_from_hits = [&max_stretch, &min_occ, &hit_map,
+                                                   &salmonOpts, &num_valid_hits, &total_occs,
+                                                   &largest_occ, &qidx](
+                  auto& raw_hits, auto& prev_read_pos,
+                  auto& max_allowed_occ, auto& had_alt_max_occ
+                ) -> bool {
                   for (auto& raw_hit : raw_hits) {
                     auto& read_pos = raw_hit.first;
                     auto& proj_hits = raw_hit.second;
@@ -823,71 +764,69 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                     min_occ = std::min(min_occ, num_occ);
                     had_alt_max_occ = true;
 
-                    // SANITY
-                    if (read_pos <= prev_read_pos) {
-                      salmonOpts.jointLog->warn(
-                          "read_pos : {}, prev_read_pos : {}", read_pos,
-                          prev_read_pos);
-                    }
-
                     bool still_have_valid_target = false;
-
                     prev_read_pos = read_pos;
                     if (num_occ <= max_allowed_occ) {
 
                       total_occs += num_occ;
-                      largest_occ =
-                          (num_occ > largest_occ) ? num_occ : largest_occ;
-                      float score_inc = 1.0 / num_occ;
-                      perfect_score += score_inc;
+                      largest_occ = (num_occ > largest_occ) ? num_occ : largest_occ;
+                      float score_inc = 1.0;
 
-                      for (auto& pos_it : refs) {
+                      for (auto &pos_it : refs) {
                         const auto& ref_pos_ori = proj_hits.decodeHit(pos_it);
-                        uint32_t tid = static_cast<uint32_t>(
-                            qidx->getRefId(pos_it.transcript_id()));
+                        uint32_t tid = static_cast<uint32_t>(qidx->getRefId(pos_it.transcript_id()));
                         int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
                         bool ori = ref_pos_ori.isFW;
-                      auto& target = hit_map[tid];
-
-                      // why >= here instead of ==?
-                      // Because hits can happen on the same target in both the forward
-                      // and rc orientations, it is possible that we start the loop with
-                      // the target having num_valid_hits hits in a given orientation (o)
-                      // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
-                      // then we see a hit for this target in orientation rc(o).  We still want to
-                      // add / consider this hit, but max_hits_for_target() > num_valid_hits.
-                      // So, we must allow for that here.
-                      if (target.max_hits_for_target() >= num_valid_hits) {
-                        if (ori) {
-                          target.add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
-                        } else {
-                          target.add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                        auto& target = hit_map[tid];
+
+                        // Why >= here instead of == ?
+                        // Because hits can happen on the same target in both the forward
+                        // and rc orientations, it is possible that we start the loop with
+                        // the target having num_valid_hits hits in a given orientation (o)
+                        // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
+                        // then we see a hit for this target in orientation rc(o).  We still want to
+                        // add / consider this hit, but max_hits_for_target() > num_valid_hits.
+                        // So, we must allow for that here.
+                        if (target.max_hits_for_target() >= num_valid_hits) {
+                          if (ori) {
+                            target.add_fw(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                          } else {
+                            target.add_rc(pos, static_cast<int32_t>(read_pos), max_stretch, score_inc);
+                          }
+
+                          still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
                         }
 
-                        still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
-                      }
-
-
                       } // DONE: for (auto &pos_it : refs)
 
                       ++num_valid_hits;
+
                       // if there are no targets reaching the valid hit threshold, then break early
-                      if (!still_have_valid_target) {
-                        break;
-                      }
+                      if (!still_have_valid_target) { return true; }
+
+                    } // DONE : if (num_occ <= max_allowed_occ)
+                  } // DONE : for (auto& raw_hit : raw_hits)
+
+                  return false;
+                };
 
-                    }   // DONE : if (num_occ <= max_allowed_occ)
+                bool _discard = false;
+                auto mao_first_pass = max_occ_default - 1;
+                bool early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos, mao_first_pass, _discard);
 
-                  }     // DONE : for (auto& raw_hit : raw_hits)
+                // If our default threshold was too stringent, then fallback to a more liberal
+                // threshold and look up the k-mers that occur the least frequently.
+                // Specifically, if the min occuring hits have frequency < max_occ_recover (2500 by default)
+                // times, then collect the min occuring hits to get the mapping.
+                if (attempt_occ_recover and (min_occ >= max_occ_default) and (min_occ < max_occ_recover)) {
+                  prev_read_pos = -1;
+                  uint64_t max_allowed_occ = min_occ;
+                  early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos, max_allowed_occ, had_alt_max_occ);
                 }
 
-                //float perfect_score = static_cast<float>(num_valid_hits) / total_occs;
-                float acceptable_score = (num_valid_hits == 1) ? perfect_score : 
-                  perfect_score - (1.0f / largest_occ);
                 uint32_t best_alt_hits = 0;
                 int32_t signed_read_len = static_cast<int32_t>(readSubSeq->length());
 
-                bool saw_acceptable_score = false;
                 for (auto& kv : hit_map) {
                   auto best_hit_dir = kv.second.best_hit_direction();
                   // if the best direction is FW or BOTH, add the fw hit
@@ -914,7 +853,10 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
 
                 alt_max_occ = had_alt_max_occ ? accepted_hits.size() : salmonOpts.maxReadOccs;
 
-                 /*
+                /*
+                 * This rule; if enabled, allows through mappings missing a single hit, if there
+                 * was no mapping with all hits. NOTE: this won't work with the current early-exit
+                 * optimization however.
                 if (accepted_hits.empty() and (num_valid_hits > 1) and (best_alt_hits >= num_valid_hits - 1)) {
                   for (auto& kv : hit_map) { 
                     auto simple_hit = kv.second.get_best_hit();
@@ -926,7 +868,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
                     }
                   }
                 }
-                  */
+                */
               } // DONE : if (rh)
 
             } else {

From 575a2fe19aff95f98904980694ee74a2eadb64a5 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Fri, 21 Jan 2022 14:06:26 -0500
Subject: [PATCH 09/13] bump fqfeeder and phmap

---
 include/FastxParser.hpp                   |   31 +-
 include/FastxParserThreadUtils.hpp        |   13 +-
 include/SalmonUtils.hpp                   |    8 +
 include/kseq++.hpp                        |  740 ++++
 include/parallel_hashmap/btree.h          | 4050 +++++++++++++++++++++
 include/parallel_hashmap/conanfile.py     |   36 +
 include/parallel_hashmap/phmap.h          |  960 +++--
 include/parallel_hashmap/phmap_base.h     |   52 +-
 include/parallel_hashmap/phmap_bits.h     |   18 +-
 include/parallel_hashmap/phmap_config.h   |   25 +-
 include/parallel_hashmap/phmap_dump.h     |  260 ++
 include/parallel_hashmap/phmap_fwd_decl.h |   68 +-
 include/parallel_hashmap/phmap_utils.h    |    2 +
 scripts/fetchPufferfish.sh                |    4 +-
 src/FastxParser.cpp                       |  117 +-
 src/SalmonAlevin.cpp                      |   10 +-
 src/SalmonUtils.cpp                       |   15 +
 17 files changed, 5960 insertions(+), 449 deletions(-)
 create mode 100644 include/kseq++.hpp
 create mode 100644 include/parallel_hashmap/btree.h
 create mode 100644 include/parallel_hashmap/conanfile.py
 create mode 100644 include/parallel_hashmap/phmap_dump.h

diff --git a/include/FastxParser.hpp b/include/FastxParser.hpp
index 8a9b65a5d..7b141a58c 100644
--- a/include/FastxParser.hpp
+++ b/include/FastxParser.hpp
@@ -10,9 +10,7 @@
 #include <thread>
 #include <vector>
 
-extern "C" {
-#include "kseq.h"
-}
+#include "kseq++.hpp"
 
 #include "concurrentqueue.h"
 
@@ -59,27 +57,24 @@ typename _Unique_if<T>::_Known_bound make_unique(Args&&...) = delete;
 #endif //__FASTX_PARSER_PRECXX14_MAKE_UNIQUE__
 
 namespace fastx_parser {
-struct ReadSeq {
-    std::string seq;
-    std::string name;
-    ~ReadSeq() {}
-};
 
-struct ReadQual {
-  std::string seq;
-  std::string name;
-  std::string qual;
-  ~ReadQual() {}
-};
+using ReadSeq = klibpp::KSeq;
+using ReadQual = klibpp::KSeq;
 
+// The ReadPair and ReadQualPair are obviously
+// redundant. But, having them as separate types
+// here would allow us to say something at compile
+// time about if we expect to be able to look
+// at qualities etc.  Think more about if we
+// really want to keep both of these.
 struct ReadPair {
-  ReadSeq first;
-  ReadSeq second;
+  klibpp::KSeq first;
+  klibpp::KSeq second;
 };
 
 struct ReadQualPair {
-  ReadQual first;
-  ReadQual second;
+  klibpp::KSeq first;
+  klibpp::KSeq second;
 };
 
 template <typename T> class ReadChunk {
diff --git a/include/FastxParserThreadUtils.hpp b/include/FastxParserThreadUtils.hpp
index 413e17d2a..0a18c4b19 100644
--- a/include/FastxParserThreadUtils.hpp
+++ b/include/FastxParserThreadUtils.hpp
@@ -6,8 +6,13 @@
 #include <pthread.h>
 #include <random>
 #include <thread>
+
 #if defined(__SSE2__)
-#include "simde/x86/sse2.h"
+ #if defined(HAVE_SIMDE)
+  #include "simde/x86/sse2.h"
+ #else
+  #include <emmintrin.h>
+ #endif
 #endif
 
 // Most of this code is taken directly from
@@ -23,7 +28,11 @@ static const size_t MAX_BACKOFF_ITERS = 1024;
 
 ALWAYS_INLINE static void cpuRelax() {
 #if defined(__SSE2__)  // AMD and Intel
-  simde_mm_pause();
+  #if defined(HAVE_SIMDE)
+    simde_mm_pause();
+  #else
+    _mm_pause();
+  #endif
 #elif defined(__i386__) || defined(__x86_64__)
   asm volatile("pause");
 #elif defined(__aarch64__)
diff --git a/include/SalmonUtils.hpp b/include/SalmonUtils.hpp
index 8615a36a2..a5507732c 100644
--- a/include/SalmonUtils.hpp
+++ b/include/SalmonUtils.hpp
@@ -189,6 +189,14 @@ inline void incLoop(std::atomic<double>& val, double inc) {
 
 std::string getCurrentTimeAsString();
 
+// encodes the heuristic for guessing how threads should
+// be allocated based on the available reads
+// returns true if input was modified and false otherwise.
+bool configure_parsing(size_t nfiles,            // input param
+                       size_t& worker_threads,   // input/output param
+                       uint32_t& parse_threads    // input/output param
+);
+
 bool validateOptionsAlignment_(SalmonOpts& sopt);
 bool validateOptionsMapping_(SalmonOpts& sopt);
 
diff --git a/include/kseq++.hpp b/include/kseq++.hpp
new file mode 100644
index 000000000..7cebd636f
--- /dev/null
+++ b/include/kseq++.hpp
@@ -0,0 +1,740 @@
+/**
+ *    @file  kseq++.hpp
+ *   @brief  C++ implementation of kseq library.
+ *
+ *  This is a header-only library re-implementing the original kseq library.
+ *
+ *  @author  Ali Ghaffaari (\@cartoonist), <ali.ghaffaari@mpi-inf.mpg.de>
+ *
+ *  @internal
+ *       Created:  Sun Jul 15, 2018  19:15
+ *  Organization:  Max-Planck-Institut fuer Informatik
+ *     Copyright:  Copyright (c) 2018, Ali Ghaffaari
+ *
+ *  This source code is released under the terms of the MIT License.
+ *  See LICENSE file for more information.
+ */
+
+#ifndef  KSEQPP_KSEQPP_HPP__
+#define  KSEQPP_KSEQPP_HPP__
+
+#include <cassert>
+#include <cctype>
+#include <cstring>
+#include <cstdlib>
+#include <ios>
+#include <vector>
+#include <string>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+//#include "config.hpp"
+
+namespace klibpp {
+  template< typename TFile,
+            typename TFunc,
+            typename TSpec >
+              class KStream;
+
+  class KStreamBase_ {
+    protected:
+      /* Typedefs */
+      using size_type = long int;
+      using char_type = char;
+  };
+
+  struct KSeq {  // kseq_t
+    std::string name;
+    std::string comment;
+    std::string seq;
+    std::string qual;
+    inline void clear( ) {
+      name.clear();
+      comment.clear();
+      seq.clear();
+      qual.clear();
+    }
+  };
+
+  namespace mode {
+    struct In_ { };
+    struct Out_ { };
+
+    constexpr In_ in;
+    constexpr Out_ out;
+  }  /* -----  end of namespace mode  ----- */
+
+  namespace format {
+    enum Format { mix, fasta, fastq };
+  }
+
+  struct KEnd_ {};
+  constexpr KEnd_ kend;
+
+  template< typename TFile,
+            typename TFunc >
+    class KStream< TFile, TFunc, mode::Out_ > : public KStreamBase_ {
+      public:
+        /* Typedefs */
+        using base_type = KStreamBase_;
+        using spec_type = mode::Out_;
+        using size_type = base_type::size_type;
+        using char_type = base_type::char_type;
+        using close_type = int(*)( TFile );
+      protected:
+        /* Consts */
+        constexpr static std::make_unsigned_t< size_type > DEFAULT_BUFSIZE = 131072;
+        constexpr static unsigned int DEFAULT_WRAPLEN = 60;
+        constexpr static format::Format DEFAULT_FORMAT = format::mix;
+        /* Data members */
+        char_type* m_buf;                               /**< @brief character buffer */
+        char_type* w_buf;                               /**< @brief second character buffer */
+        size_type bufsize;                              /**< @brief buffer size */
+        std::thread worker;                             /**< @brief worker thread */
+        std::unique_ptr< std::mutex > bufslock;         /**< @brief buffers mutex */
+        std::unique_ptr< std::condition_variable > cv;  /**< @brief consumer/producer condition variable */
+        bool terminate;                                 /**< @brief thread terminate flag XXX: set before notify */
+        bool produced;                                  /**< @brief produced flag. XXX: SHARED (data race) */
+        size_type m_begin;                              /**< @brief begin buffer index */
+        size_type m_end;                                /**< @brief end buffer index or error flag if -1 */
+        size_type w_end;                                /**< @brief end second buffer index or error flag if -1 */
+        unsigned int wraplen;                           /**< @brief line wrap length */
+        unsigned long int counter;                      /**< @brief number of records written so far */
+        format::Format fmt;                             /**< @brief format of the output records */
+        TFile f;                                        /**< @brief file handler */
+        TFunc func;                                     /**< @brief write function */
+        close_type close;                               /**< @brief close function */
+      public:
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type=mode::out,
+            format::Format fmt_=DEFAULT_FORMAT,
+            std::make_unsigned_t< size_type > bs_=DEFAULT_BUFSIZE,
+            close_type cfunc_=nullptr )
+          : m_buf( new char_type[ bs_ ] ), w_buf( new char_type[ bs_ ] ),
+          bufsize( bs_ ), bufslock( new std::mutex ), cv( new std::condition_variable ),
+          wraplen( DEFAULT_WRAPLEN ), fmt( fmt_ ), f( std::move( f_ ) ),
+          func( std::move(  func_  ) ), close( cfunc_ )
+        {
+          this->m_begin = 0;
+          this->m_end = 0;
+          this->terminate = false;
+          this->produced = false;
+          this->w_end = 0;
+          this->counter = 0;
+          this->worker_start();
+        }
+
+        KStream( TFile f_,
+            TFunc func_,
+            format::Format fmt_,
+            std::make_unsigned_t< size_type > bs_=DEFAULT_BUFSIZE,
+            close_type cfunc_=nullptr )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, fmt_, bs_, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type,
+            format::Format fmt_,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, fmt_, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            format::Format fmt_,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, fmt_, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type,
+            std::make_unsigned_t< size_type > bs_,
+            close_type cfunc_=nullptr )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, DEFAULT_FORMAT, bs_, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            std::make_unsigned_t< size_type > bs_,
+            close_type cfunc_=nullptr )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, DEFAULT_FORMAT, bs_, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, DEFAULT_FORMAT, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::out, DEFAULT_FORMAT, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( KStream const& ) = delete;
+        KStream& operator=( KStream const& ) = delete;
+
+        KStream( KStream&& other ) noexcept
+        {
+          other.worker_join();
+          this->m_buf = other.m_buf;
+          this->w_buf = other.w_buf;
+          other.m_buf = nullptr;
+          other.w_buf = nullptr;
+          this->bufsize = other.bufsize;
+          this->bufslock = std::move( other.bufslock );
+          this->cv = std::move( other.cv );
+          this->terminate = false;
+          this->produced = other.produced;
+          this->m_begin = other.m_begin;
+          this->m_end = other.m_end;
+          this->w_end = other.w_end;
+          this->wraplen = other.wraplen;
+          this->counter = other.counter;
+          this->fmt = other.fmt;
+          this->f = std::move( other.f );
+          this->func = std::move( other.func );
+          this->close = other.close;
+          this->worker_start();
+        }
+
+        KStream& operator=( KStream&& other ) noexcept
+        {
+          if ( this == &other ) return *this;
+          other.worker_join();
+          delete[] this->m_buf;
+          delete[] this->w_buf;
+          this->m_buf = other.m_buf;
+          this->w_buf = other.w_buf;
+          other.m_buf = nullptr;
+          other.w_buf = nullptr;
+          this->bufsize = other.bufsize;
+          this->bufslock = std::move( other.bufslock );
+          this->cv = std::move( other.cv );
+          this->terminate = false;
+          this->produced = other.produced;
+          this->m_begin = other.m_begin;
+          this->m_end = other.m_end;
+          this->w_end = other.w_end;
+          this->wraplen = other.wraplen;
+          this->counter = other.counter;
+          this->fmt = other.fmt;
+          this->f = std::move( other.f );
+          this->func = std::move( other.func );
+          this->close = other.close;
+          this->worker_start();
+          return *this;
+        }
+
+        ~KStream( ) noexcept
+        {
+          this->worker_join();
+          delete[] this->m_buf;
+          delete[] this->w_buf;
+          if ( this->close != nullptr ) this->close( this->f );
+        }
+        /* Accessors */
+          inline unsigned long int
+        counts( ) const
+        {
+          return this->counter;
+        }
+
+          inline format::Format
+        get_format( ) const
+        {
+          return this->fmt;
+        }
+        /* Mutators */
+          inline void
+        set_wraplen( unsigned int len )
+        {
+          this->wraplen = len;
+        }
+
+          inline void
+        set_format( format::Format fmt_ )
+        {
+          this->fmt = fmt_;
+        }
+        /* Methods */
+          inline bool
+        fail( ) const
+        {
+          return this->m_end == -1;
+        }
+
+          inline KStream&
+        operator<<( const KSeq& rec )
+        {
+          if ( ( this->fmt == format::mix && rec.qual.empty() ) ||  // FASTA record
+               ( this->fmt == format::fasta ) ) this->puts( '>' );      // Forced FASTA
+          else {
+            if ( rec.qual.size() != rec.seq.size() ) {
+              throw std::runtime_error( "the sequence length doesn't match with"
+                                        " the length of its quality string.");
+            }
+            this->puts( '@' );  // FASTQ record
+          }
+          this->puts( rec.name );
+          if ( !rec.comment.empty() ) {
+            this->puts( ' ' );
+            this->puts( rec.comment );
+          }
+          this->puts( '\n' );
+          this->puts( rec.seq, true );
+          if ( ( this->fmt == format::mix && !rec.qual.empty() ) ||  // FASTQ record
+               ( this->fmt == format::fastq ) ) {                        // Forced FASTQ
+            this->puts( '\n' );
+            this->puts( '+' );
+            this->puts( '\n' );
+            this->puts( rec.qual, true );
+          }
+          this->puts( '\n' );
+          if ( *this ) this->counter++;
+          return *this;
+        }
+
+          inline KStream&
+        operator<<( format::Format fmt_ )
+        {
+          this->fmt = fmt_;
+          return *this;
+        }
+
+          inline KStream&
+        operator<<( KEnd_ )
+        {
+          this->flush();
+          return *this;
+        }
+
+        operator bool( ) const
+        {
+          return !this->fail();
+        }
+        /* Low-level methods */
+          inline bool
+        puts( std::string const& s, bool wrap=false ) noexcept
+        {
+          if ( this->fail() ) return false;
+
+          std::string::size_type cursor = 0;
+          std::string::size_type len = 0;
+          while ( cursor != s.size() ) {
+            assert( cursor < s.size() );
+            if ( this->m_begin >= this->bufsize ) this->async_write();
+            if ( this->fail() ) break;
+            if ( wrap && cursor != 0 && cursor % this->wraplen == 0 ) {
+              this->m_buf[ this->m_begin++ ] = '\n';
+            }
+            len = std::min( s.size() - cursor,
+                static_cast< std::string::size_type >( this->bufsize - this->m_begin ) );
+            if ( wrap )
+              len = std::min( len, this->wraplen - cursor % this->wraplen );
+            std::copy( &s[ cursor ], &s[ cursor ] + len, this->m_buf + this->m_begin );
+            this->m_begin += len;
+            cursor += len;
+          }
+          return !this->fail();
+        }
+
+          inline bool
+        puts( char_type c ) noexcept
+        {
+          if ( this->fail() ) return false;
+          if ( this->m_begin >= this->bufsize ) this->async_write();
+          this->m_buf[ this->m_begin++ ] = c;
+          return !this->fail();
+        }
+
+          inline void
+        flush( ) noexcept
+        {
+          this->async_write( );
+          {
+            // wait until it is actually written to the file.
+            std::unique_lock< std::mutex > lock( *this->bufslock );
+            this->cv->wait( lock, [this]{ return !this->produced; } );
+          }
+        }
+      private:
+        /* Methods */
+          inline void
+        async_write( bool term=false ) noexcept
+        {
+          if ( this->fail() || this->terminate ) return;
+
+          {
+            std::unique_lock< std::mutex > lock( *this->bufslock );
+            this->cv->wait( lock, [this]{ return !this->produced; } );
+            this->m_end = this->w_end;
+            if ( !this->fail() ) {
+              this->w_end = this->m_begin;
+              std::copy( this->m_buf, this->m_buf + this->m_begin, this->w_buf );
+              this->produced = true;
+              if ( term ) this->terminate = true;  /**< XXX: only set here! */
+            }
+            this->m_begin = 0;
+          }
+          this->cv->notify_one();
+        }
+
+          inline void
+        writer( ) noexcept
+        {
+          bool term = false;
+          do {
+            {
+              std::unique_lock< std::mutex > lock( *this->bufslock );
+              this->cv->wait( lock, [this]{ return this->produced; } );
+              if ( !this->func( this->f, this->w_buf, this->w_end ) && this->w_end ) {
+                this->w_end = -1;
+              }
+              this->produced = false;
+              if ( this->terminate || this->w_end < 0 ) term = true;
+            }
+            this->cv->notify_one();
+          } while ( !term );
+        }
+
+          inline void
+        worker_join( )
+        {
+          this->async_write( true );
+          if ( this->worker.joinable() ) this->worker.join();
+        }
+
+          inline void
+        worker_start( )
+        {
+          this->worker = std::thread( &KStream::writer, this );
+        }
+    };
+
+  template< typename TFile,
+            typename TFunc >
+    class KStream< TFile, TFunc, mode::In_ > : public KStreamBase_ {  // kstream_t
+      public:
+        /* Typedefs */
+        using base_type = KStreamBase_;
+        using spec_type = mode::In_;
+        using size_type = base_type::size_type;
+        using char_type = base_type::char_type;
+        using close_type = int(*)( TFile );
+      protected:
+        /* Separators */
+        constexpr static char_type SEP_SPACE = 0;  // isspace(): \t, \n, \v, \f, \r
+        constexpr static char_type SEP_TAB = 1;    // isspace() && !' '
+        constexpr static char_type SEP_LINE = 2;   // line separator: "\n" (Unix) or "\r\n" (Windows)
+        constexpr static char_type SEP_MAX = 2;
+        /* Consts */
+        constexpr static std::make_unsigned_t< size_type > DEFAULT_BUFSIZE = 16384;
+        /* Data members */
+        char_type* buf;                      /**< @brief character buffer */
+        size_type bufsize;                   /**< @brief buffer size */
+        size_type begin;                     /**< @brief begin buffer index */
+        size_type end;                       /**< @brief end buffer index or error flag if -1 */
+        bool is_eof;                         /**< @brief eof flag */
+        bool is_tqs;                         /**< @brief truncated quality string flag */
+        bool is_ready;                       /**< @brief next record ready flag */
+        bool last;                           /**< @brief last read was successful */
+        unsigned long int counter;           /**< @brief number of parsed records so far */
+        TFile f;                             /**< @brief file handler */
+        TFunc func;                          /**< @brief read function */
+        close_type close;                    /**< @brief close function */
+      public:
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type=mode::in,
+            std::make_unsigned_t< size_type > bs_=DEFAULT_BUFSIZE,
+            close_type cfunc_=nullptr )  // ks_init
+          : buf( new char_type[ bs_ ] ), bufsize( bs_ ),
+          f( std::move( f_ ) ), func( std::move(  func_  ) ), close( cfunc_ )
+        {
+          this->begin = 0;
+          this->end = 0;
+          this->is_eof = false;
+          this->is_tqs = false;
+          this->is_ready = false;
+          this->last = false;
+          this->counter = 0;
+        }
+
+        KStream( TFile f_,
+            TFunc func_,
+            std::make_unsigned_t< size_type > bs_,
+            close_type cfunc_=nullptr )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::in, bs_, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            spec_type,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::in, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( TFile f_,
+            TFunc func_,
+            close_type cfunc_ )
+          : KStream( std::move( f_ ), std::move( func_ ), mode::in, DEFAULT_BUFSIZE, cfunc_ )
+        { }
+
+        KStream( KStream const& ) = delete;
+        KStream& operator=( KStream const& ) = delete;
+
+        KStream( KStream&& other ) noexcept
+        {
+          this->buf = other.buf;
+          other.buf = nullptr;
+          this->bufsize = other.bufsize;
+          this->begin = other.begin;
+          this->end = other.end;
+          this->is_eof = other.is_eof;
+          this->is_tqs = other.is_tqs;
+          this->is_ready = other.is_ready;
+          this->last = other.last;
+          this->counter = other.counter;
+          this->f = std::move( other.f );
+          this->func = std::move( other.func );
+          this->close = other.close;
+        }
+
+        KStream& operator=( KStream&& other ) noexcept
+        {
+          if ( this == &other ) return *this;
+          delete[] this->buf;
+          this->buf = other.buf;
+          other.buf = nullptr;
+          this->bufsize = other.bufsize;
+          this->begin = other.begin;
+          this->end = other.end;
+          this->is_eof = other.is_eof;
+          this->is_tqs = other.is_tqs;
+          this->is_ready = other.is_ready;
+          this->last = other.last;
+          this->counter = other.counter;
+          this->f = std::move( other.f );
+          this->func = std::move( other.func );
+          this->close = other.close;
+          return *this;
+        }
+
+        ~KStream( ) noexcept
+        {
+          delete[] this->buf;
+          if ( this->close != nullptr ) this->close( this->f );
+        }
+        /* Accessors */
+          inline unsigned long int
+        counts( ) const
+        {
+          return this->counter;
+        }
+        /* Methods */
+          inline bool
+        err( ) const  // ks_err
+        {
+          return this->end == -1;
+        }
+
+          inline bool
+        eof( ) const  // ks_eof
+        {
+          return this->is_eof && this->begin >= this->end;
+        }
+
+          inline bool
+        tqs( ) const
+        {
+          return this->is_tqs;
+        }
+
+          inline bool
+        fail( ) const
+        {
+          return this->err() || this->tqs() || ( this->eof() && !this->last );
+        }
+
+          inline KStream&
+        operator>>( KSeq& rec )  // kseq_read
+        {
+          char_type c;
+          this->last = false;
+          if ( !this->is_ready ) {  // then jump to the next header line
+            while ( ( c = this->getc( ) ) && c != '>' && c != '@' );
+            if ( this->fail() ) return *this;
+            this->is_ready = true;
+          }  // else: the first header char has been read in the previous call
+          rec.clear();  // reset all members
+          if ( !this->getuntil( KStream::SEP_SPACE, rec.name, &c ) ) return *this;
+          if ( c != '\n' ) {  // read FASTA/Q comment
+            this->getuntil( KStream::SEP_LINE, rec.comment, nullptr );
+          }
+          while ( ( c = this->getc( ) ) && c != '>' && c != '@' && c != '+' ) {
+            if ( c == '\n' ) continue;  // skip empty lines
+            rec.seq += c;
+            this->getuntil( KStream::SEP_LINE, rec.seq, nullptr, true ); // read the rest of the line
+          }
+          this->last = true;
+          ++this->counter;
+          if ( c == '>' || c == '@' ) this->is_ready = true;  // the first header char has been read
+          if ( c != '+' ) return *this;  // FASTA
+          while ( ( c = this->getc( ) ) && c != '\n' );  // skip the rest of '+' line
+          if ( this->eof() ) {  // error: no quality string
+            this->is_tqs = true;
+            return *this;
+          }
+          while ( this->getuntil( KStream::SEP_LINE, rec.qual, nullptr, true ) &&
+              rec.qual.size() < rec.seq.size() );
+          if ( this->err() ) return *this;
+          this->is_ready = false;  // we have not come to the next header line
+          if ( rec.seq.size() != rec.qual.size() ) {  // error: qual string is of a different length
+            this->is_tqs = true;  // should return here
+          }
+
+          return *this;
+        }
+
+        operator bool( ) const
+        {
+          return !this->fail();
+        }
+
+          inline std::vector< KSeq >
+        read( std::vector< KSeq >::size_type const size )
+        {
+          std::vector< KSeq > ret;
+          ret.reserve( size );
+          for ( std::vector< KSeq >::size_type i = 0; i < size; ++i ) {
+            ret.emplace_back();
+            *this >> ret.back();
+            if ( !( *this ) ) {
+              ret.pop_back();
+              break;
+            }
+          }
+          return ret;
+        }
+
+          inline std::vector< KSeq >
+        read( )
+        {
+          std::vector< KSeq > ret;
+          while ( ( ret.emplace_back(), true ) && *this >> ret.back() );
+          ret.pop_back();
+          return ret;
+        }
+        /* Low-level methods */
+          inline char_type
+        getc( ) noexcept  // ks_getc
+        {
+          // error
+          if ( this->err() || this->eof() ) return 0;
+          // fetch
+          if ( this->begin >= this->end ) {
+            this->begin = 0;
+            this->end = this->func( this->f, this->buf, this->bufsize );
+            if ( this->end <= 0 ) {  // err if end == -1 and eof if 0
+              this->is_eof = true;
+              return 0;
+            }
+          }
+          // ready
+          return this->buf[ this->begin++ ];
+        }
+
+          inline bool
+        getuntil( char_type delimiter, std::string& str, char_type *dret, bool append=false )  // ks_getuntil
+          noexcept
+        {
+          char_type c;
+          bool gotany = false;
+          if ( dret ) *dret = 0;
+          if ( !append ) str.clear();
+          size_type i = -1;
+          do {
+            if ( !( c = this->getc( ) ) ) break;
+            --this->begin;
+            if ( delimiter == KStream::SEP_LINE ) {
+              for ( i = this->begin; i < this->end; ++i ) {
+                if ( this->buf[ i ] == '\n' ) break;
+              }
+            }
+            else if ( delimiter > KStream::SEP_MAX ) {
+              for ( i = this->begin; i < this->end; ++i ) {
+                if ( this->buf[ i ] == delimiter ) break;
+              }
+            }
+            else if ( delimiter == KStream::SEP_SPACE ) {
+              for ( i = this->begin; i < this->end; ++i ) {
+                if ( std::isspace( this->buf[ i ] ) ) break;
+              }
+            }
+            else if ( delimiter == KStream::SEP_TAB ) {
+              for ( i = this->begin; i < this->end; ++i ) {
+                if ( std::isspace( this->buf[ i ] ) && this->buf[ i ] != ' ' ) break;
+              }
+            }
+            else {
+              assert( false );  // it should not reach here
+              return false;  // when assert is replaced by NOOP
+            }
+
+            gotany = true;
+            str.append( this->buf + this->begin, i - this->begin );
+            this->begin = i + 1;
+          } while ( i >= this->end );
+
+          if ( this->err() || ( this->eof() && !gotany ) ) return false;
+
+          assert( i != -1 );
+          if ( !this->eof() && dret ) *dret = this->buf[ i ];
+          if ( delimiter == KStream::SEP_LINE && !str.empty() && str.back() == '\r' ) {
+            str.pop_back();
+          }
+          return true;
+        }
+    };
+
+  template< typename TFile, typename TFunc >
+    using KStreamIn = KStream< TFile, TFunc, mode::In_ >;
+
+  template< typename TFile, typename TFunc >
+    using KStreamOut = KStream< TFile, TFunc, mode::Out_ >;
+
+  template< typename TFile, typename TFunc, typename TSpec, typename... Args >
+      inline KStream< std::decay_t< TFile >, std::decay_t< TFunc >, TSpec >
+    make_kstream( TFile&& file, TFunc&& func, TSpec, Args&&... args )
+    {
+      return KStream< std::decay_t< TFile >, std::decay_t< TFunc >, TSpec >(
+          std::forward< TFile >( file ), std::forward< TFunc >( func ), TSpec(),
+          std::forward< Args >( args )... );
+    }
+
+  template< typename TFile, typename TFunc, typename... Args >
+      inline KStream< std::decay_t< TFile >, std::decay_t< TFunc >, mode::In_ >
+    make_ikstream( TFile&& file, TFunc&& func, Args&&... args )
+    {
+      return KStream< std::decay_t< TFile >, std::decay_t< TFunc >, mode::In_ >(
+          std::forward< TFile >( file ), std::forward< TFunc >( func ), mode::in,
+          std::forward< Args >( args )... );
+    }
+
+  template< typename TFile, typename TFunc, typename... Args >
+      inline KStream< std::decay_t< TFile >, std::decay_t< TFunc >, mode::Out_ >
+    make_okstream( TFile&& file, TFunc&& func, Args&&... args )
+    {
+      return KStream< std::decay_t< TFile >, std::decay_t< TFunc >, mode::Out_ >(
+          std::forward< TFile >( file ), std::forward< TFunc >( func ), mode::out,
+          std::forward< Args >( args )... );
+    }
+}  /* -----  end of namespace klibpp  ----- */
+#endif  /* ----- #ifndef KSEQPP_KSEQPP_HPP__  ----- */
diff --git a/include/parallel_hashmap/btree.h b/include/parallel_hashmap/btree.h
new file mode 100644
index 000000000..cbfb8eeb3
--- /dev/null
+++ b/include/parallel_hashmap/btree.h
@@ -0,0 +1,4050 @@
+// ---------------------------------------------------------------------------
+// Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp)
+// with modifications.
+//
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// ---------------------------------------------------------------------------
+
+#ifndef PHMAP_BTREE_BTREE_CONTAINER_H_
+#define PHMAP_BTREE_BTREE_CONTAINER_H_
+
+#ifdef _MSC_VER
+    #pragma warning(push)  
+
+    #pragma warning(disable : 4127) // conditional expression is constant
+    #pragma warning(disable : 4324) // structure was padded due to alignment specifier
+    #pragma warning(disable : 4355) // 'this': used in base member initializer list
+    #pragma warning(disable : 4365) // conversion from 'int' to 'const unsigned __int64', signed/unsigned mismatch
+    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+    #pragma warning(disable : 4623) // default constructor was implicitly defined as deleted
+    #pragma warning(disable : 4625) // copy constructor was implicitly defined as deleted
+    #pragma warning(disable : 4626) // assignment operator was implicitly defined as deleted
+    #pragma warning(disable : 4710) // function not inlined
+    #pragma warning(disable : 4711) //  selected for automatic inline expansion
+    #pragma warning(disable : 4820) // '6' bytes padding added after data member
+    #pragma warning(disable : 4868) // compiler may not enforce left-to-right evaluation order in braced initializer list
+    #pragma warning(disable : 5026) // move constructor was implicitly defined as deleted
+    #pragma warning(disable : 5027) // move assignment operator was implicitly defined as deleted
+    #pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
+#endif
+
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <new>
+
+#include "phmap_fwd_decl.h"
+#include "phmap_base.h"
+
+#if PHMAP_HAVE_STD_STRING_VIEW
+    #include <string_view>
+#endif
+
+// MSVC constructibility traits do not detect destructor properties and so our
+// implementations should not use them as a source-of-truth.
+#if defined(_MSC_VER) && !defined(__clang__) && !defined(__GNUC__)
+    #define PHMAP_META_INTERNAL_STD_CONSTRUCTION_TRAITS_DONT_CHECK_DESTRUCTION 1
+#endif
+
+namespace phmap {
+
+    // Defined and documented later on in this file.
+    template <typename T>
+    struct is_trivially_destructible;
+
+    // Defined and documented later on in this file.
+    template <typename T>
+    struct is_trivially_move_assignable;
+
+    namespace type_traits_internal {
+
+        // Silence MSVC warnings about the destructor being defined as deleted.
+#if defined(_MSC_VER) && !defined(__GNUC__)
+    #pragma warning(push)
+    #pragma warning(disable : 4624)
+#endif  // defined(_MSC_VER) && !defined(__GNUC__)
+
+        template <class T>
+        union SingleMemberUnion {
+            T t;
+        };
+
+        // Restore the state of the destructor warning that was silenced above.
+#if defined(_MSC_VER) && !defined(__GNUC__)
+    #pragma warning(pop)
+#endif  // defined(_MSC_VER) && !defined(__GNUC__)
+
+        template <class T>
+        struct IsTriviallyMoveConstructibleObject
+            : std::integral_constant<
+            bool, std::is_move_constructible<
+                      type_traits_internal::SingleMemberUnion<T>>::value &&
+            phmap::is_trivially_destructible<T>::value> {};
+
+        template <class T>
+        struct IsTriviallyCopyConstructibleObject
+            : std::integral_constant<
+            bool, std::is_copy_constructible<
+                      type_traits_internal::SingleMemberUnion<T>>::value &&
+            phmap::is_trivially_destructible<T>::value> {};
+
+        template <class T>
+        struct IsTriviallyMoveAssignableReference : std::false_type {};
+
+        template <class T>
+        struct IsTriviallyMoveAssignableReference<T&>
+            : phmap::is_trivially_move_assignable<T>::type {};
+
+        template <class T>
+        struct IsTriviallyMoveAssignableReference<T&&>
+            : phmap::is_trivially_move_assignable<T>::type {};
+
+    }  // namespace type_traits_internal
+
+
+    template <typename... Ts>
+    using void_t = typename type_traits_internal::VoidTImpl<Ts...>::type;
+
+
+    template <typename T>
+    struct is_function
+        : std::integral_constant<
+        bool, !(std::is_reference<T>::value ||
+                std::is_const<typename std::add_const<T>::type>::value)> {};
+
+
+    namespace type_traits_internal {
+
+        template <typename T>
+        class is_trivially_copyable_impl {
+            using ExtentsRemoved = typename std::remove_all_extents<T>::type;
+            static constexpr bool kIsCopyOrMoveConstructible =
+                std::is_copy_constructible<ExtentsRemoved>::value ||
+                std::is_move_constructible<ExtentsRemoved>::value;
+            static constexpr bool kIsCopyOrMoveAssignable =
+                phmap::is_copy_assignable<ExtentsRemoved>::value ||
+                phmap::is_move_assignable<ExtentsRemoved>::value;
+
+        public:
+            static constexpr bool kValue =
+                (__has_trivial_copy(ExtentsRemoved) || !kIsCopyOrMoveConstructible) &&
+                (__has_trivial_assign(ExtentsRemoved) || !kIsCopyOrMoveAssignable) &&
+                (kIsCopyOrMoveConstructible || kIsCopyOrMoveAssignable) &&
+                is_trivially_destructible<ExtentsRemoved>::value &&
+                // We need to check for this explicitly because otherwise we'll say
+                // references are trivial copyable when compiled by MSVC.
+                !std::is_reference<ExtentsRemoved>::value;
+        };
+
+        template <typename T>
+        struct is_trivially_copyable
+            : std::integral_constant<
+            bool, type_traits_internal::is_trivially_copyable_impl<T>::kValue> {};
+    }  // namespace type_traits_internal
+
+    namespace swap_internal {
+
+        // Necessary for the traits.
+        using std::swap;
+
+        // This declaration prevents global `swap` and `phmap::swap` overloads from being
+        // considered unless ADL picks them up.
+        void swap();
+
+        template <class T>
+        using IsSwappableImpl = decltype(swap(std::declval<T&>(), std::declval<T&>()));
+
+        // NOTE: This dance with the default template parameter is for MSVC.
+        template <class T,
+                  class IsNoexcept = std::integral_constant<
+                      bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))>>
+            using IsNothrowSwappableImpl = typename std::enable_if<IsNoexcept::value>::type;
+
+        template <class T>
+        struct IsSwappable
+            : phmap::type_traits_internal::is_detected<IsSwappableImpl, T> {};
+
+        template <class T>
+        struct IsNothrowSwappable
+            : phmap::type_traits_internal::is_detected<IsNothrowSwappableImpl, T> {};
+
+        template <class T, phmap::enable_if_t<IsSwappable<T>::value, int> = 0>
+        void Swap(T& lhs, T& rhs) noexcept(IsNothrowSwappable<T>::value) {
+            swap(lhs, rhs);
+        }
+
+       using StdSwapIsUnconstrained = IsSwappable<void()>;
+
+    }  // namespace swap_internal
+
+    namespace type_traits_internal {
+
+        // Make the swap-related traits/function accessible from this namespace.
+        using swap_internal::IsNothrowSwappable;
+        using swap_internal::IsSwappable;
+        using swap_internal::Swap;
+        using swap_internal::StdSwapIsUnconstrained;
+
+    }  // namespace type_traits_internal
+
+    namespace compare_internal {
+
+        using value_type = int8_t;
+
+        template <typename T>
+        struct Fail {
+            static_assert(sizeof(T) < 0, "Only literal `0` is allowed.");
+        };
+
+        template <typename NullPtrT = std::nullptr_t>
+        struct OnlyLiteralZero {
+            constexpr OnlyLiteralZero(NullPtrT) noexcept {}  // NOLINT
+
+            template <
+                typename T,
+                typename = typename std::enable_if<
+                    std::is_same<T, std::nullptr_t>::value ||
+                    (std::is_integral<T>::value && !std::is_same<T, int>::value)>::type,
+                typename = typename Fail<T>::type>
+                OnlyLiteralZero(T);  // NOLINT
+        };
+
+        enum class eq : value_type {
+            equal = 0,
+                equivalent = equal,
+                nonequal = 1,
+                nonequivalent = nonequal,
+                };
+
+        enum class ord : value_type { less = -1, greater = 1 };
+
+        enum class ncmp : value_type { unordered = -127 };
+
+#if defined(__cpp_inline_variables) && !defined(_MSC_VER)
+
+#define PHMAP_COMPARE_INLINE_BASECLASS_DECL(name)
+
+#define PHMAP_COMPARE_INLINE_SUBCLASS_DECL(type, name)  \
+        static const type name;
+
+#define PHMAP_COMPARE_INLINE_INIT(type, name, init) \
+        inline constexpr type type::name(init)
+
+#else  // __cpp_inline_variables
+
+#define PHMAP_COMPARE_INLINE_BASECLASS_DECL(name)   \
+        static const T name;
+
+#define PHMAP_COMPARE_INLINE_SUBCLASS_DECL(type, name)
+
+#define PHMAP_COMPARE_INLINE_INIT(type, name, init)             \
+        template <typename T>                                   \
+        const T compare_internal::type##_base<T>::name(init)
+
+#endif  // __cpp_inline_variables
+
+        // These template base classes allow for defining the values of the constants
+        // in the header file (for performance) without using inline variables (which
+        // aren't available in C++11).
+        template <typename T>
+        struct weak_equality_base {
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequivalent)
+        };
+
+        template <typename T>
+        struct strong_equality_base {
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equal)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequal)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(nonequivalent)
+        };
+
+        template <typename T>
+        struct partial_ordering_base {
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(less)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(unordered)
+        };
+
+        template <typename T>
+        struct weak_ordering_base {
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(less)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater)
+        };
+
+        template <typename T>
+        struct strong_ordering_base {
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(less)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equal)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(equivalent)
+            PHMAP_COMPARE_INLINE_BASECLASS_DECL(greater)
+        };
+
+    }  // namespace compare_internal
+
+    class weak_equality
+        : public compare_internal::weak_equality_base<weak_equality> {
+        explicit constexpr weak_equality(compare_internal::eq v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        friend struct compare_internal::weak_equality_base<weak_equality>;
+
+    public:
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_equality, equivalent)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_equality, nonequivalent)
+
+        // Comparisons
+        friend constexpr bool operator==(
+            weak_equality v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ == 0;
+        }
+        friend constexpr bool operator!=(
+            weak_equality v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ != 0;
+        }
+        friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>,
+                                         weak_equality v) noexcept {
+            return 0 == v.value_;
+        }
+        friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>,
+                                         weak_equality v) noexcept {
+            return 0 != v.value_;
+        }
+
+    private:
+        compare_internal::value_type value_;
+    };
+    PHMAP_COMPARE_INLINE_INIT(weak_equality, equivalent,
+                              compare_internal::eq::equivalent);
+    PHMAP_COMPARE_INLINE_INIT(weak_equality, nonequivalent,
+                              compare_internal::eq::nonequivalent);
+
+    class strong_equality
+        : public compare_internal::strong_equality_base<strong_equality> {
+        explicit constexpr strong_equality(compare_internal::eq v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        friend struct compare_internal::strong_equality_base<strong_equality>;
+
+    public:
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, equal)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, nonequal)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, equivalent)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_equality, nonequivalent)
+
+        // Conversion
+        constexpr operator weak_equality() const noexcept {  // NOLINT
+            return value_ == 0 ? weak_equality::equivalent
+                : weak_equality::nonequivalent;
+        }
+        // Comparisons
+        friend constexpr bool operator==(
+            strong_equality v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ == 0;
+        }
+        friend constexpr bool operator!=(
+            strong_equality v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ != 0;
+        }
+        friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>,
+                                         strong_equality v) noexcept {
+            return 0 == v.value_;
+        }
+        friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>,
+                                         strong_equality v) noexcept {
+            return 0 != v.value_;
+        }
+
+    private:
+        compare_internal::value_type value_;
+    };
+
+    PHMAP_COMPARE_INLINE_INIT(strong_equality, equal, compare_internal::eq::equal);
+    PHMAP_COMPARE_INLINE_INIT(strong_equality, nonequal,
+                              compare_internal::eq::nonequal);
+    PHMAP_COMPARE_INLINE_INIT(strong_equality, equivalent,
+                              compare_internal::eq::equivalent);
+    PHMAP_COMPARE_INLINE_INIT(strong_equality, nonequivalent,
+                              compare_internal::eq::nonequivalent);
+
+    class partial_ordering
+        : public compare_internal::partial_ordering_base<partial_ordering> {
+        explicit constexpr partial_ordering(compare_internal::eq v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        explicit constexpr partial_ordering(compare_internal::ord v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        explicit constexpr partial_ordering(compare_internal::ncmp v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        friend struct compare_internal::partial_ordering_base<partial_ordering>;
+
+        constexpr bool is_ordered() const noexcept {
+            return value_ !=
+                compare_internal::value_type(compare_internal::ncmp::unordered);
+        }
+
+    public:
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, less)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, equivalent)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, greater)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(partial_ordering, unordered)
+
+        // Conversion
+        constexpr operator weak_equality() const noexcept {  // NOLINT
+            return value_ == 0 ? weak_equality::equivalent
+                : weak_equality::nonequivalent;
+        }
+        // Comparisons
+        friend constexpr bool operator==(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.is_ordered() && v.value_ == 0;
+        }
+        friend constexpr bool operator!=(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return !v.is_ordered() || v.value_ != 0;
+        }
+        friend constexpr bool operator<(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.is_ordered() && v.value_ < 0;
+        }
+        friend constexpr bool operator<=(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.is_ordered() && v.value_ <= 0;
+        }
+        friend constexpr bool operator>(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.is_ordered() && v.value_ > 0;
+        }
+        friend constexpr bool operator>=(
+            partial_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.is_ordered() && v.value_ >= 0;
+        }
+        friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>,
+                                         partial_ordering v) noexcept {
+            return v.is_ordered() && 0 == v.value_;
+        }
+        friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>,
+                                         partial_ordering v) noexcept {
+            return !v.is_ordered() || 0 != v.value_;
+        }
+        friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>,
+                                        partial_ordering v) noexcept {
+            return v.is_ordered() && 0 < v.value_;
+        }
+        friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>,
+                                         partial_ordering v) noexcept {
+            return v.is_ordered() && 0 <= v.value_;
+        }
+        friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>,
+                                        partial_ordering v) noexcept {
+            return v.is_ordered() && 0 > v.value_;
+        }
+        friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>,
+                                         partial_ordering v) noexcept {
+            return v.is_ordered() && 0 >= v.value_;
+        }
+
+    private:
+        compare_internal::value_type value_;
+    };
+
+    PHMAP_COMPARE_INLINE_INIT(partial_ordering, less, compare_internal::ord::less);
+    PHMAP_COMPARE_INLINE_INIT(partial_ordering, equivalent,
+                              compare_internal::eq::equivalent);
+    PHMAP_COMPARE_INLINE_INIT(partial_ordering, greater,
+                              compare_internal::ord::greater);
+    PHMAP_COMPARE_INLINE_INIT(partial_ordering, unordered,
+                              compare_internal::ncmp::unordered);
+
+    class weak_ordering
+        : public compare_internal::weak_ordering_base<weak_ordering> {
+        explicit constexpr weak_ordering(compare_internal::eq v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        explicit constexpr weak_ordering(compare_internal::ord v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        friend struct compare_internal::weak_ordering_base<weak_ordering>;
+
+    public:
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, less)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, equivalent)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(weak_ordering, greater)
+
+        // Conversions
+        constexpr operator weak_equality() const noexcept {  // NOLINT
+            return value_ == 0 ? weak_equality::equivalent
+                : weak_equality::nonequivalent;
+        }
+        constexpr operator partial_ordering() const noexcept {  // NOLINT
+            return value_ == 0 ? partial_ordering::equivalent
+                : (value_ < 0 ? partial_ordering::less
+                   : partial_ordering::greater);
+        }
+        // Comparisons
+        friend constexpr bool operator==(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ == 0;
+        }
+        friend constexpr bool operator!=(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ != 0;
+        }
+        friend constexpr bool operator<(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ < 0;
+        }
+        friend constexpr bool operator<=(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ <= 0;
+        }
+        friend constexpr bool operator>(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ > 0;
+        }
+        friend constexpr bool operator>=(
+            weak_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ >= 0;
+        }
+        friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>,
+                                         weak_ordering v) noexcept {
+            return 0 == v.value_;
+        }
+        friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>,
+                                         weak_ordering v) noexcept {
+            return 0 != v.value_;
+        }
+        friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>,
+                                        weak_ordering v) noexcept {
+            return 0 < v.value_;
+        }
+        friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>,
+                                         weak_ordering v) noexcept {
+            return 0 <= v.value_;
+        }
+        friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>,
+                                        weak_ordering v) noexcept {
+            return 0 > v.value_;
+        }
+        friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>,
+                                         weak_ordering v) noexcept {
+            return 0 >= v.value_;
+        }
+
+    private:
+        compare_internal::value_type value_;
+    };
+
+    PHMAP_COMPARE_INLINE_INIT(weak_ordering, less, compare_internal::ord::less);
+    PHMAP_COMPARE_INLINE_INIT(weak_ordering, equivalent,
+                              compare_internal::eq::equivalent);
+    PHMAP_COMPARE_INLINE_INIT(weak_ordering, greater,
+                              compare_internal::ord::greater);
+
+    class strong_ordering
+        : public compare_internal::strong_ordering_base<strong_ordering> {
+        explicit constexpr strong_ordering(compare_internal::eq v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        explicit constexpr strong_ordering(compare_internal::ord v) noexcept
+            : value_(static_cast<compare_internal::value_type>(v)) {}
+        friend struct compare_internal::strong_ordering_base<strong_ordering>;
+
+    public:
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, less)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, equal)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, equivalent)
+        PHMAP_COMPARE_INLINE_SUBCLASS_DECL(strong_ordering, greater)
+
+        // Conversions
+        constexpr operator weak_equality() const noexcept {  // NOLINT
+            return value_ == 0 ? weak_equality::equivalent
+                : weak_equality::nonequivalent;
+        }
+        constexpr operator strong_equality() const noexcept {  // NOLINT
+            return value_ == 0 ? strong_equality::equal : strong_equality::nonequal;
+        }
+        constexpr operator partial_ordering() const noexcept {  // NOLINT
+            return value_ == 0 ? partial_ordering::equivalent
+                : (value_ < 0 ? partial_ordering::less
+                   : partial_ordering::greater);
+        }
+        constexpr operator weak_ordering() const noexcept {  // NOLINT
+            return value_ == 0
+                ? weak_ordering::equivalent
+                : (value_ < 0 ? weak_ordering::less : weak_ordering::greater);
+        }
+        // Comparisons
+        friend constexpr bool operator==(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ == 0;
+        }
+        friend constexpr bool operator!=(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ != 0;
+        }
+        friend constexpr bool operator<(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ < 0;
+        }
+        friend constexpr bool operator<=(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ <= 0;
+        }
+        friend constexpr bool operator>(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ > 0;
+        }
+        friend constexpr bool operator>=(
+            strong_ordering v, compare_internal::OnlyLiteralZero<>) noexcept {
+            return v.value_ >= 0;
+        }
+        friend constexpr bool operator==(compare_internal::OnlyLiteralZero<>,
+                                         strong_ordering v) noexcept {
+            return 0 == v.value_;
+        }
+        friend constexpr bool operator!=(compare_internal::OnlyLiteralZero<>,
+                                         strong_ordering v) noexcept {
+            return 0 != v.value_;
+        }
+        friend constexpr bool operator<(compare_internal::OnlyLiteralZero<>,
+                                        strong_ordering v) noexcept {
+            return 0 < v.value_;
+        }
+        friend constexpr bool operator<=(compare_internal::OnlyLiteralZero<>,
+                                         strong_ordering v) noexcept {
+            return 0 <= v.value_;
+        }
+        friend constexpr bool operator>(compare_internal::OnlyLiteralZero<>,
+                                        strong_ordering v) noexcept {
+            return 0 > v.value_;
+        }
+        friend constexpr bool operator>=(compare_internal::OnlyLiteralZero<>,
+                                         strong_ordering v) noexcept {
+            return 0 >= v.value_;
+        }
+
+    private:
+        compare_internal::value_type value_;
+    };
+    PHMAP_COMPARE_INLINE_INIT(strong_ordering, less, compare_internal::ord::less);
+    PHMAP_COMPARE_INLINE_INIT(strong_ordering, equal, compare_internal::eq::equal);
+    PHMAP_COMPARE_INLINE_INIT(strong_ordering, equivalent,
+                              compare_internal::eq::equivalent);
+    PHMAP_COMPARE_INLINE_INIT(strong_ordering, greater,
+                              compare_internal::ord::greater);
+
+#undef PHMAP_COMPARE_INLINE_BASECLASS_DECL
+#undef PHMAP_COMPARE_INLINE_SUBCLASS_DECL
+#undef PHMAP_COMPARE_INLINE_INIT
+
+    namespace compare_internal {
+        // We also provide these comparator adapter functions for internal phmap use.
+
+        // Helper functions to do a boolean comparison of two keys given a boolean
+        // or three-way comparator.
+        // SFINAE prevents implicit conversions to bool (such as from int).
+        template <typename BoolType,
+                  phmap::enable_if_t<std::is_same<bool, BoolType>::value, int> = 0>
+        constexpr bool compare_result_as_less_than(const BoolType r) { return r; }
+        constexpr bool compare_result_as_less_than(const phmap::weak_ordering r) {
+            return r < 0;
+        }
+
+        template <typename Compare, typename K, typename LK>
+        constexpr bool do_less_than_comparison(const Compare &compare, const K &x,
+                                               const LK &y) {
+            return compare_result_as_less_than(compare(x, y));
+        }
+
+        // Helper functions to do a three-way comparison of two keys given a boolean or
+        // three-way comparator.
+        // SFINAE prevents implicit conversions to int (such as from bool).
+        template <typename Int,
+                  phmap::enable_if_t<std::is_same<int, Int>::value, int> = 0>
+        constexpr phmap::weak_ordering compare_result_as_ordering(const Int c) {
+            return c < 0 ? phmap::weak_ordering::less
+                       : c == 0 ? phmap::weak_ordering::equivalent
+                       : phmap::weak_ordering::greater;
+        }
+        constexpr phmap::weak_ordering compare_result_as_ordering(
+            const phmap::weak_ordering c) {
+            return c;
+        }
+
+        template <
+            typename Compare, typename K, typename LK,
+            phmap::enable_if_t<!std::is_same<bool, phmap::invoke_result_t<
+                                                       Compare, const K &, const LK &>>::value,
+                               int> = 0>
+            constexpr phmap::weak_ordering do_three_way_comparison(const Compare &compare,
+                                                                   const K &x, const LK &y) {
+            return compare_result_as_ordering(compare(x, y));
+        }
+        template <
+            typename Compare, typename K, typename LK,
+            phmap::enable_if_t<std::is_same<bool, phmap::invoke_result_t<Compare,
+            const K &, const LK &>>::value,
+                               int> = 0>
+            constexpr phmap::weak_ordering do_three_way_comparison(const Compare &compare,
+                                                                   const K &x, const LK &y) {
+            return compare(x, y) ? phmap::weak_ordering::less
+                : compare(y, x) ? phmap::weak_ordering::greater
+                : phmap::weak_ordering::equivalent;
+        }
+
+    }  // namespace compare_internal
+}
+
+
+namespace phmap {
+
+namespace priv {
+
+    // A helper class that indicates if the Compare parameter is a key-compare-to
+    // comparator.
+    template <typename Compare, typename T>
+    using btree_is_key_compare_to =
+        std::is_convertible<phmap::invoke_result_t<Compare, const T &, const T &>,
+                            phmap::weak_ordering>;
+
+    struct StringBtreeDefaultLess {
+        using is_transparent = void;
+
+        StringBtreeDefaultLess() = default;
+
+        // Compatibility constructor.
+        StringBtreeDefaultLess(std::less<std::string>) {}       // NOLINT
+#if PHMAP_HAVE_STD_STRING_VIEW
+        StringBtreeDefaultLess(std::less<std::string_view>) {}  // NOLINT
+        StringBtreeDefaultLess(phmap::Less<std::string_view>) {}  // NOLINT
+
+        phmap::weak_ordering operator()(std::string_view lhs,
+                                        std::string_view rhs) const {
+            return compare_internal::compare_result_as_ordering(lhs.compare(rhs));
+        }
+#else
+        phmap::weak_ordering operator()(std::string lhs,
+                                        std::string rhs) const {
+            return compare_internal::compare_result_as_ordering(lhs.compare(rhs));
+        }
+#endif
+    };
+
+    struct StringBtreeDefaultGreater {
+        using is_transparent = void;
+
+        StringBtreeDefaultGreater() = default;
+
+        StringBtreeDefaultGreater(std::greater<std::string>) {}       // NOLINT
+#if PHMAP_HAVE_STD_STRING_VIEW
+        StringBtreeDefaultGreater(std::greater<std::string_view>) {}  // NOLINT
+
+        phmap::weak_ordering operator()(std::string_view lhs,
+                                        std::string_view rhs) const {
+            return compare_internal::compare_result_as_ordering(rhs.compare(lhs));
+        }
+#else
+        phmap::weak_ordering operator()(std::string lhs,
+                                        std::string rhs) const {
+            return compare_internal::compare_result_as_ordering(rhs.compare(lhs));
+        }
+#endif
+    };
+
+    // A helper class to convert a boolean comparison into a three-way "compare-to"
+    // comparison that returns a negative value to indicate less-than, zero to
+    // indicate equality and a positive value to indicate greater-than. This helper
+    // class is specialized for less<std::string>, greater<std::string>,
+    // less<std::string_view>, and greater<std::string_view>.
+    //
+    // key_compare_to_adapter is provided so that btree users
+    // automatically get the more efficient compare-to code when using common
+    // google string types with common comparison functors.
+    // These string-like specializations also turn on heterogeneous lookup by
+    // default.
+    template <typename Compare>
+    struct key_compare_to_adapter {
+        using type = Compare;
+    };
+
+    template <>
+    struct key_compare_to_adapter<std::less<std::string>> {
+        using type = StringBtreeDefaultLess;
+    };
+
+    template <>
+    struct key_compare_to_adapter<phmap::Less<std::string>> {
+        using type = StringBtreeDefaultLess;
+    };
+
+    template <>
+    struct key_compare_to_adapter<std::greater<std::string>> {
+        using type = StringBtreeDefaultGreater;
+    };
+
+#if PHMAP_HAVE_STD_STRING_VIEW
+    template <>
+    struct key_compare_to_adapter<std::less<std::string_view>> {
+        using type = StringBtreeDefaultLess;
+    };
+
+    template <>
+    struct key_compare_to_adapter<phmap::Less<std::string_view>> {
+        using type = StringBtreeDefaultLess;
+    };
+
+    template <>
+    struct key_compare_to_adapter<std::greater<std::string_view>> {
+        using type = StringBtreeDefaultGreater;
+    };
+#endif
+
+    template <typename Key, typename Compare, typename Alloc, int TargetNodeSize,
+              bool Multi, typename SlotPolicy>
+    struct common_params {
+        // If Compare is a common comparator for a std::string-like type, then we adapt it
+        // to use heterogeneous lookup and to be a key-compare-to comparator.
+        using key_compare = typename key_compare_to_adapter<Compare>::type;
+        // A type which indicates if we have a key-compare-to functor or a plain old
+        // key-compare functor.
+        using is_key_compare_to = btree_is_key_compare_to<key_compare, Key>;
+
+        using allocator_type = Alloc;
+        using key_type = Key;
+        using size_type = std::size_t ;
+        using difference_type = ptrdiff_t;
+
+        // True if this is a multiset or multimap.
+        using is_multi_container = std::integral_constant<bool, Multi>;
+
+        using slot_policy = SlotPolicy;
+        using slot_type = typename slot_policy::slot_type;
+        using value_type = typename slot_policy::value_type;
+        using init_type = typename slot_policy::mutable_value_type;
+        using pointer = value_type *;
+        using const_pointer = const value_type *;
+        using reference = value_type &;
+        using const_reference = const value_type &;
+
+        enum {
+            kTargetNodeSize = TargetNodeSize,
+
+            // Upper bound for the available space for values. This is largest for leaf
+            // nodes, which have overhead of at least a pointer + 4 bytes (for storing
+            // 3 field_types and an enum).
+            kNodeValueSpace =
+            TargetNodeSize - /*minimum overhead=*/(sizeof(void *) + 4),
+        };
+
+        // This is an integral type large enough to hold as many
+        // ValueSize-values as will fit a node of TargetNodeSize bytes.
+        using node_count_type =
+            phmap::conditional_t<(kNodeValueSpace / sizeof(value_type) >
+                                   (std::numeric_limits<uint8_t>::max)()),
+            uint16_t, uint8_t>;  // NOLINT
+
+        // The following methods are necessary for passing this struct as PolicyTraits
+        // for node_handle and/or are used within btree.
+        static value_type &element(slot_type *slot) {
+            return slot_policy::element(slot);
+        }
+        static const value_type &element(const slot_type *slot) {
+            return slot_policy::element(slot);
+        }
+        template <class... Args>
+        static void construct(Alloc *alloc, slot_type *slot, Args &&... args) {
+            slot_policy::construct(alloc, slot, std::forward<Args>(args)...);
+        }
+        static void construct(Alloc *alloc, slot_type *slot, slot_type *other) {
+            slot_policy::construct(alloc, slot, other);
+        }
+        static void destroy(Alloc *alloc, slot_type *slot) {
+            slot_policy::destroy(alloc, slot);
+        }
+        static void transfer(Alloc *alloc, slot_type *new_slot, slot_type *old_slot) {
+            construct(alloc, new_slot, old_slot);
+            destroy(alloc, old_slot);
+        }
+        static void swap(Alloc *alloc, slot_type *a, slot_type *b) {
+            slot_policy::swap(alloc, a, b);
+        }
+        static void move(Alloc *alloc, slot_type *src, slot_type *dest) {
+            slot_policy::move(alloc, src, dest);
+        }
+        static void move(Alloc *alloc, slot_type *first, slot_type *last,
+                         slot_type *result) {
+            slot_policy::move(alloc, first, last, result);
+        }
+    };
+
+    // A parameters structure for holding the type parameters for a btree_map.
+    // Compare and Alloc should be nothrow copy-constructible.
+    template <typename Key, typename Data, typename Compare, typename Alloc,
+              int TargetNodeSize, bool Multi>
+    struct map_params : common_params<Key, Compare, Alloc, TargetNodeSize, Multi,
+                                      phmap::priv::map_slot_policy<Key, Data>> {
+        using super_type = typename map_params::common_params;
+        using mapped_type = Data;
+        // This type allows us to move keys when it is safe to do so. It is safe
+        // for maps in which value_type and mutable_value_type are layout compatible.
+        using slot_policy = typename super_type::slot_policy;
+        using slot_type = typename super_type::slot_type;
+        using value_type = typename super_type::value_type;
+        using init_type = typename super_type::init_type;
+
+        using key_compare = typename super_type::key_compare;
+        // Inherit from key_compare for empty base class optimization.
+        struct value_compare : private key_compare {
+            value_compare() = default;
+            explicit value_compare(const key_compare &cmp) : key_compare(cmp) {}
+
+            template <typename T, typename U>
+            auto operator()(const T &left, const U &right) const
+                -> decltype(std::declval<key_compare>()(left.first, right.first)) {
+                return key_compare::operator()(left.first, right.first);
+            }
+        };
+        using is_map_container = std::true_type;
+
+        static const Key &key(const value_type &x) { return x.first; }
+        static const Key &key(const init_type &x) { return x.first; }
+        static const Key &key(const slot_type *x) { return slot_policy::key(x); }
+        static mapped_type &value(value_type *value) { return value->second; }
+    };
+
+    // This type implements the necessary functions from the
+    // btree::priv::slot_type interface.
+    template <typename Key>
+    struct set_slot_policy {
+        using slot_type = Key;
+        using value_type = Key;
+        using mutable_value_type = Key;
+
+        static value_type &element(slot_type *slot) { return *slot; }
+        static const value_type &element(const slot_type *slot) { return *slot; }
+
+        template <typename Alloc, class... Args>
+        static void construct(Alloc *alloc, slot_type *slot, Args &&... args) {
+            phmap::allocator_traits<Alloc>::construct(*alloc, slot,
+                                                       std::forward<Args>(args)...);
+        }
+
+        template <typename Alloc>
+        static void construct(Alloc *alloc, slot_type *slot, slot_type *other) {
+            phmap::allocator_traits<Alloc>::construct(*alloc, slot, std::move(*other));
+        }
+
+        template <typename Alloc>
+        static void destroy(Alloc *alloc, slot_type *slot) {
+            phmap::allocator_traits<Alloc>::destroy(*alloc, slot);
+        }
+
+        template <typename Alloc>
+        static void swap(Alloc * /*alloc*/, slot_type *a, slot_type *b) {
+            using std::swap;
+            swap(*a, *b);
+        }
+
+        template <typename Alloc>
+        static void move(Alloc * /*alloc*/, slot_type *src, slot_type *dest) {
+            *dest = std::move(*src);
+        }
+
+        template <typename Alloc>
+        static void move(Alloc *alloc, slot_type *first, slot_type *last,
+                         slot_type *result) {
+            for (slot_type *src = first, *dest = result; src != last; ++src, ++dest)
+                move(alloc, src, dest);
+        }
+    };
+
+    // A parameters structure for holding the type parameters for a btree_set.
+    // Compare and Alloc should be nothrow copy-constructible.
+    template <typename Key, typename Compare, typename Alloc, int TargetNodeSize,
+              bool Multi>
+    struct set_params : common_params<Key, Compare, Alloc, TargetNodeSize, Multi,
+                                      set_slot_policy<Key>> {
+        using value_type = Key;
+        using slot_type = typename set_params::common_params::slot_type;
+        using value_compare = typename set_params::common_params::key_compare;
+        using is_map_container = std::false_type;
+
+        static const Key &key(const value_type &x) { return x; }
+        static const Key &key(const slot_type *x) { return *x; }
+    };
+
+    // An adapter class that converts a lower-bound compare into an upper-bound
+    // compare. Note: there is no need to make a version of this adapter specialized
+    // for key-compare-to functors because the upper-bound (the first value greater
+    // than the input) is never an exact match.
+    template <typename Compare>
+    struct upper_bound_adapter {
+        explicit upper_bound_adapter(const Compare &c) : comp(c) {}
+        template <typename K, typename LK>
+        bool operator()(const K &a, const LK &b) const {
+            // Returns true when a is not greater than b.
+            return !phmap::compare_internal::compare_result_as_less_than(comp(b, a));
+        }
+
+    private:
+        Compare comp;
+    };
+
+    enum class MatchKind : uint8_t { kEq, kNe };
+
+    template <typename V, bool IsCompareTo>
+    struct SearchResult {
+        V value;
+        MatchKind match;
+
+        static constexpr bool HasMatch() { return true; }
+        bool IsEq() const { return match == MatchKind::kEq; }
+    };
+
+    // When we don't use CompareTo, `match` is not present.
+    // This ensures that callers can't use it accidentally when it provides no
+    // useful information.
+    template <typename V>
+    struct SearchResult<V, false> {
+        V value;
+
+        static constexpr bool HasMatch() { return false; }
+        static constexpr bool IsEq() { return false; }
+    };
+
+    // A node in the btree holding. The same node type is used for both internal
+    // and leaf nodes in the btree, though the nodes are allocated in such a way
+    // that the children array is only valid in internal nodes.
+    template <typename Params>
+    class btree_node {
+        using is_key_compare_to = typename Params::is_key_compare_to;
+        using is_multi_container = typename Params::is_multi_container;
+        using field_type = typename Params::node_count_type;
+        using allocator_type = typename Params::allocator_type;
+        using slot_type = typename Params::slot_type;
+
+    public:
+        using params_type = Params;
+        using key_type = typename Params::key_type;
+        using value_type = typename Params::value_type;
+        using pointer = typename Params::pointer;
+        using const_pointer = typename Params::const_pointer;
+        using reference = typename Params::reference;
+        using const_reference = typename Params::const_reference;
+        using key_compare = typename Params::key_compare;
+        using size_type = typename Params::size_type;
+        using difference_type = typename Params::difference_type;
+
+        // Btree decides whether to use linear node search as follows:
+        //   - If the key is arithmetic and the comparator is std::less or
+        //     std::greater, choose linear.
+        //   - Otherwise, choose binary.
+        // TODO(ezb): Might make sense to add condition(s) based on node-size.
+        using use_linear_search = std::integral_constant<
+            bool,
+            std::is_arithmetic<key_type>::value &&
+            (std::is_same<phmap::Less<key_type>, key_compare>::value ||
+             std::is_same<std::less<key_type>, key_compare>::value ||
+             std::is_same<std::greater<key_type>, key_compare>::value)>;
+
+
+        ~btree_node() = default;
+        btree_node(btree_node const &) = delete;
+        btree_node &operator=(btree_node const &) = delete;
+
+        // Public for EmptyNodeType.
+        constexpr static size_type Alignment() {
+            static_assert(LeafLayout(1).Alignment() == InternalLayout().Alignment(),
+                          "Alignment of all nodes must be equal.");
+            return (size_type)InternalLayout().Alignment();
+        }
+
+    protected:
+        btree_node() = default;
+
+    private:
+        using layout_type = phmap::priv::Layout<btree_node *, field_type,
+                                                               slot_type, btree_node *>;
+        constexpr static size_type SizeWithNValues(size_type n) {
+            return (size_type)layout_type(/*parent*/ 1,
+                               /*position, start, count, max_count*/ 4,
+                               /*values*/ (size_t)n,
+                               /*children*/ 0)
+                .AllocSize();
+        }
+        // A lower bound for the overhead of fields other than values in a leaf node.
+        constexpr static size_type MinimumOverhead() {
+            return (size_type)(SizeWithNValues(1) - sizeof(value_type));
+        }
+
+        // Compute how many values we can fit onto a leaf node taking into account
+        // padding.
+        constexpr static size_type NodeTargetValues(const int begin, const int end) {
+            return begin == end ? begin
+                : SizeWithNValues((begin + end) / 2 + 1) >
+                params_type::kTargetNodeSize
+                ? NodeTargetValues(begin, (begin + end) / 2)
+                : NodeTargetValues((begin + end) / 2 + 1, end);
+        }
+
+        enum {
+            kTargetNodeSize = params_type::kTargetNodeSize,
+            kNodeTargetValues = NodeTargetValues(0, params_type::kTargetNodeSize),
+
+            // We need a minimum of 3 values per internal node in order to perform
+            // splitting (1 value for the two nodes involved in the split and 1 value
+            // propagated to the parent as the delimiter for the split).
+            kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
+
+            // The node is internal (i.e. is not a leaf node) if and only if `max_count`
+            // has this value.
+            kInternalNodeMaxCount = 0,
+        };
+
+        // Leaves can have less than kNodeValues values.
+        constexpr static layout_type LeafLayout(const int max_values = kNodeValues) {
+            return layout_type(/*parent*/ 1,
+                               /*position, start, count, max_count*/ 4,
+                               /*values*/ (size_t)max_values,
+                               /*children*/ 0);
+        }
+        constexpr static layout_type InternalLayout() {
+            return layout_type(/*parent*/ 1,
+                               /*position, start, count, max_count*/ 4,
+                               /*values*/ kNodeValues,
+                               /*children*/ kNodeValues + 1);
+        }
+        constexpr static size_type LeafSize(const int max_values = kNodeValues) {
+            return (size_type)LeafLayout(max_values).AllocSize();
+        }
+        constexpr static size_type InternalSize() {
+            return (size_type)InternalLayout().AllocSize();
+        }
+
+        // N is the index of the type in the Layout definition.
+        // ElementType<N> is the Nth type in the Layout definition.
+        template <size_type N>
+        inline typename layout_type::template ElementType<N> *GetField() {
+            // We assert that we don't read from values that aren't there.
+            assert(N < 3 || !leaf());
+            return InternalLayout().template Pointer<N>(reinterpret_cast<char *>(this));
+        }
+
+        template <size_type N>
+        inline const typename layout_type::template ElementType<N> *GetField() const {
+            assert(N < 3 || !leaf());
+            return InternalLayout().template Pointer<N>(
+                reinterpret_cast<const char *>(this));
+        }
+
+        void set_parent(btree_node *p)     { *GetField<0>() = p; }
+        field_type &mutable_count()        { return GetField<1>()[2]; }
+        slot_type *slot(size_type i)       { return &GetField<2>()[i]; }
+        const slot_type *slot(size_type i) const { return &GetField<2>()[i]; }
+        void set_position(field_type v)    { GetField<1>()[0] = v; }
+        void set_start(field_type v)       { GetField<1>()[1] = v; }
+        void set_count(field_type v)       { GetField<1>()[2] = v; }
+        void set_max_count(field_type v)   { GetField<1>()[3] = v; }
+
+    public:
+        // Whether this is a leaf node or not. This value doesn't change after the
+        // node is created.
+        bool leaf() const { return GetField<1>()[3] != kInternalNodeMaxCount; }
+
+        // Getter for the position of this node in its parent.
+        field_type position() const { return GetField<1>()[0]; }
+
+        // Getter for the offset of the first value in the `values` array.
+        field_type start() const { return GetField<1>()[1]; }
+
+        // Getters for the number of values stored in this node.
+        field_type count() const { return GetField<1>()[2]; }
+        field_type max_count() const {
+            // Internal nodes have max_count==kInternalNodeMaxCount.
+            // Leaf nodes have max_count in [1, kNodeValues].
+            const field_type max_cnt = GetField<1>()[3];
+            return max_cnt == field_type{kInternalNodeMaxCount}
+            ? field_type{kNodeValues}
+            : max_cnt;
+        }
+
+        // Getter for the parent of this node.
+        btree_node *parent() const { return *GetField<0>(); }
+        // Getter for whether the node is the root of the tree. The parent of the
+        // root of the tree is the leftmost node in the tree which is guaranteed to
+        // be a leaf.
+        bool is_root() const { return parent()->leaf(); }
+        void make_root() {
+            assert(parent()->is_root());
+            set_parent(parent()->parent());
+        }
+
+        // Getters for the key/value at position i in the node.
+        const key_type &key(size_type i) const { return params_type::key(slot(i)); }
+        reference value(size_type i) { return params_type::element(slot(i)); }
+        const_reference value(size_type i) const { return params_type::element(slot(i)); }
+
+        // Getters/setter for the child at position i in the node.
+        btree_node *child(size_type i) const { return GetField<3>()[i]; }
+        btree_node *&mutable_child(size_type i) { return GetField<3>()[i]; }
+        void clear_child(size_type i) {
+            phmap::priv::SanitizerPoisonObject(&mutable_child(i));
+        }
+        void set_child(size_type i, btree_node *c) {
+            phmap::priv::SanitizerUnpoisonObject(&mutable_child(i));
+            mutable_child(i) = c;
+            c->set_position((field_type)i);
+        }
+        void init_child(int i, btree_node *c) {
+            set_child(i, c);
+            c->set_parent(this);
+        }
+
+        // Returns the position of the first value whose key is not less than k.
+        template <typename K>
+        SearchResult<int, is_key_compare_to::value> lower_bound(
+            const K &k, const key_compare &comp) const {
+            return use_linear_search::value ? linear_search(k, comp)
+                : binary_search(k, comp);
+        }
+        // Returns the position of the first value whose key is greater than k.
+        template <typename K>
+        int upper_bound(const K &k, const key_compare &comp) const {
+            auto upper_compare = upper_bound_adapter<key_compare>(comp);
+            return use_linear_search::value ? linear_search(k, upper_compare).value
+                : binary_search(k, upper_compare).value;
+        }
+
+        template <typename K, typename Compare>
+        SearchResult<int, btree_is_key_compare_to<Compare, key_type>::value>
+        linear_search(const K &k, const Compare &comp) const {
+            return linear_search_impl(k, 0, count(), comp,
+                                      btree_is_key_compare_to<Compare, key_type>());
+        }
+
+        template <typename K, typename Compare>
+        SearchResult<int, btree_is_key_compare_to<Compare, key_type>::value>
+        binary_search(const K &k, const Compare &comp) const {
+            return binary_search_impl(k, 0, count(), comp,
+                                      btree_is_key_compare_to<Compare, key_type>());
+        }
+
+        // Returns the position of the first value whose key is not less than k using
+        // linear search performed using plain compare.
+        template <typename K, typename Compare>
+        SearchResult<int, false> linear_search_impl(
+            const K &k, int s, const int e, const Compare &comp,
+            std::false_type /* IsCompareTo */) const {
+            while (s < e) {
+                if (!comp(key(s), k)) {
+                    break;
+                }
+                ++s;
+            }
+            return {s};
+        }
+
+        // Returns the position of the first value whose key is not less than k using
+        // linear search performed using compare-to.
+        template <typename K, typename Compare>
+        SearchResult<int, true> linear_search_impl(
+            const K &k, int s, const int e, const Compare &comp,
+            std::true_type /* IsCompareTo */) const {
+            while (s < e) {
+                const phmap::weak_ordering c = comp(key(s), k);
+                if (c == 0) {
+                    return {s, MatchKind::kEq};
+                } else if (c > 0) {
+                    break;
+                }
+                ++s;
+            }
+            return {s, MatchKind::kNe};
+        }
+
+        // Returns the position of the first value whose key is not less than k using
+        // binary search performed using plain compare.
+        template <typename K, typename Compare>
+        SearchResult<int, false> binary_search_impl(
+            const K &k, int s, int e, const Compare &comp,
+            std::false_type /* IsCompareTo */) const {
+            while (s != e) {
+                const int mid = (s + e) >> 1;
+                if (comp(key(mid), k)) {
+                    s = mid + 1;
+                } else {
+                    e = mid;
+                }
+            }
+            return {s};
+        }
+
+        // Returns the position of the first value whose key is not less than k using
+        // binary search performed using compare-to.
+        template <typename K, typename CompareTo>
+        SearchResult<int, true> binary_search_impl(
+            const K &k, int s, int e, const CompareTo &comp,
+            std::true_type /* IsCompareTo */) const {
+            if (is_multi_container::value) {
+                MatchKind exact_match = MatchKind::kNe;
+                while (s != e) {
+                    const int mid = (s + e) >> 1;
+                    const phmap::weak_ordering c = comp(key(mid), k);
+                    if (c < 0) {
+                        s = mid + 1;
+                    } else {
+                        e = mid;
+                        if (c == 0) {
+                            // Need to return the first value whose key is not less than k,
+                            // which requires continuing the binary search if this is a
+                            // multi-container.
+                            exact_match = MatchKind::kEq;
+                        }
+                    }
+                }
+                return {s, exact_match};
+            } else {  // Not a multi-container.
+                while (s != e) {
+                    const int mid = (s + e) >> 1;
+                    const phmap::weak_ordering c = comp(key(mid), k);
+                    if (c < 0) {
+                        s = mid + 1;
+                    } else if (c > 0) {
+                        e = mid;
+                    } else {
+                        return {mid, MatchKind::kEq};
+                    }
+                }
+                return {s, MatchKind::kNe};
+            }
+        }
+
+        // Emplaces a value at position i, shifting all existing values and
+        // children at positions >= i to the right by 1.
+        template <typename... Args>
+        void emplace_value(size_type i, allocator_type *alloc, Args &&... args);
+
+        // Removes the value at position i, shifting all existing values and children
+        // at positions > i to the left by 1.
+        void remove_value(int i, allocator_type *alloc);
+
+        // Removes the values at positions [i, i + to_erase), shifting all values
+        // after that range to the left by to_erase. Does not change children at all.
+        void remove_values_ignore_children(int i, size_type to_erase,
+                                           allocator_type *alloc);
+
+        // Rebalances a node with its right sibling.
+        void rebalance_right_to_left(int to_move, btree_node *right,
+                                     allocator_type *alloc);
+        void rebalance_left_to_right(int to_move, btree_node *right,
+                                     allocator_type *alloc);
+
+        // Splits a node, moving a portion of the node's values to its right sibling.
+        void split(int insert_position, btree_node *dest, allocator_type *alloc);
+
+        // Merges a node with its right sibling, moving all of the values and the
+        // delimiting key in the parent node onto itself.
+        void merge(btree_node *sibling, allocator_type *alloc);
+
+        // Swap the contents of "this" and "src".
+        void swap(btree_node *src, allocator_type *alloc);
+
+        // Node allocation/deletion routines.
+        static btree_node *init_leaf(btree_node *n, btree_node *parent,
+                                     int max_cnt) {
+            n->set_parent(parent);
+            n->set_position(0);
+            n->set_start(0);
+            n->set_count(0);
+            n->set_max_count((field_type)max_cnt);
+            phmap::priv::SanitizerPoisonMemoryRegion(
+                n->slot(0), max_cnt * sizeof(slot_type));
+            return n;
+        }
+        static btree_node *init_internal(btree_node *n, btree_node *parent) {
+            init_leaf(n, parent, kNodeValues);
+            // Set `max_count` to a sentinel value to indicate that this node is
+            // internal.
+            n->set_max_count(kInternalNodeMaxCount);
+            phmap::priv::SanitizerPoisonMemoryRegion(
+                &n->mutable_child(0), (kNodeValues + 1) * sizeof(btree_node *));
+            return n;
+        }
+        void destroy(allocator_type *alloc) {
+            for (int i = 0; i < count(); ++i) {
+                value_destroy(i, alloc);
+            }
+        }
+
+    public:
+        // Exposed only for tests.
+        static bool testonly_uses_linear_node_search() {
+            return use_linear_search::value;
+        }
+
+    private:
+        template <typename... Args>
+        void value_init(const size_type i, allocator_type *alloc, Args &&... args) {
+            phmap::priv::SanitizerUnpoisonObject(slot(i));
+            params_type::construct(alloc, slot(i), std::forward<Args>(args)...);
+        }
+        void value_destroy(const size_type i, allocator_type *alloc) {
+            params_type::destroy(alloc, slot(i));
+            phmap::priv::SanitizerPoisonObject(slot(i));
+        }
+
+        // Move n values starting at value i in this node into the values starting at
+        // value j in node x.
+        void uninitialized_move_n(const size_type n, const size_type i,
+                                  const size_type j, btree_node *x,
+                                  allocator_type *alloc) {
+            phmap::priv::SanitizerUnpoisonMemoryRegion(
+                x->slot(j), n * sizeof(slot_type));
+            for (slot_type *src = slot(i), *end = src + n, *dest = x->slot(j);
+                 src != end; ++src, ++dest) {
+                params_type::construct(alloc, dest, src);
+            }
+        }
+
+        // Destroys a range of n values, starting at index i.
+        void value_destroy_n(const size_type i, const size_type n,
+                             allocator_type *alloc) {
+            for (size_type j = 0; j < n; ++j) {
+                value_destroy(i + j, alloc);
+            }
+        }
+
+        template <typename P>
+        friend class btree;
+        template <typename N, typename R, typename P>
+        friend struct btree_iterator;
+        friend class BtreeNodePeer;
+    };
+
+    template <typename Node, typename Reference, typename Pointer>
+    struct btree_iterator {
+    private:
+        using key_type = typename Node::key_type;
+        using size_type = typename Node::size_type;
+        using params_type = typename Node::params_type;
+
+        using node_type = Node;
+        using normal_node = typename std::remove_const<Node>::type;
+        using const_node = const Node;
+        using normal_pointer = typename params_type::pointer;
+        using normal_reference = typename params_type::reference;
+        using const_pointer = typename params_type::const_pointer;
+        using const_reference = typename params_type::const_reference;
+        using slot_type = typename params_type::slot_type;
+
+        using iterator =
+            btree_iterator<normal_node, normal_reference, normal_pointer>;
+        using const_iterator =
+            btree_iterator<const_node, const_reference, const_pointer>;
+
+    public:
+        // These aliases are public for std::iterator_traits.
+        using difference_type = typename Node::difference_type;
+        using value_type = typename params_type::value_type;
+        using pointer = Pointer;
+        using reference = Reference;
+        using iterator_category = std::bidirectional_iterator_tag;
+
+        btree_iterator() : node(nullptr), position(-1) {}
+        btree_iterator(Node *n, int p) : node(n), position(p) {}
+
+        // NOTE: this SFINAE allows for implicit conversions from iterator to
+        // const_iterator, but it specifically avoids defining copy constructors so
+        // that btree_iterator can be trivially copyable. This is for performance and
+        // binary size reasons.
+        template <typename N, typename R, typename P,
+                  phmap::enable_if_t<
+                      std::is_same<btree_iterator<N, R, P>, iterator>::value &&
+                      std::is_same<btree_iterator, const_iterator>::value,
+                      int> = 0>
+            btree_iterator(const btree_iterator<N, R, P> &x)  // NOLINT
+            : node(x.node), position(x.position) {}
+
+    private:
+        // This SFINAE allows explicit conversions from const_iterator to
+        // iterator, but also avoids defining a copy constructor.
+        // NOTE: the const_cast is safe because this constructor is only called by
+        // non-const methods and the container owns the nodes.
+        template <typename N, typename R, typename P,
+                  phmap::enable_if_t<
+                      std::is_same<btree_iterator<N, R, P>, const_iterator>::value &&
+                      std::is_same<btree_iterator, iterator>::value,
+                      int> = 0>
+            explicit btree_iterator(const btree_iterator<N, R, P> &x)
+            : node(const_cast<node_type *>(x.node)), position(x.position) {}
+
+        // Increment/decrement the iterator.
+        void increment() {
+            if (node->leaf() && ++position < node->count()) {
+                return;
+            }
+            increment_slow();
+        }
+        void increment_slow();
+
+        void decrement() {
+            if (node->leaf() && --position >= 0) {
+                return;
+            }
+            decrement_slow();
+        }
+        void decrement_slow();
+
+    public:
+        bool operator==(const const_iterator &x) const {
+            return node == x.node && position == x.position;
+        }
+        bool operator!=(const const_iterator &x) const {
+            return node != x.node || position != x.position;
+        }
+
+        // Accessors for the key/value the iterator is pointing at.
+        reference operator*() const {
+            return node->value(position);
+        }
+        pointer operator->() const {
+            return &node->value(position);
+        }
+
+        btree_iterator& operator++() {
+            increment();
+            return *this;
+        }
+        btree_iterator& operator--() {
+            decrement();
+            return *this;
+        }
+        btree_iterator operator++(int) {
+            btree_iterator tmp = *this;
+            ++*this;
+            return tmp;
+        }
+        btree_iterator operator--(int) {
+            btree_iterator tmp = *this;
+            --*this;
+            return tmp;
+        }
+
+    private:
+        template <typename Params>
+        friend class btree;
+        template <typename Tree>
+        friend class btree_container;
+        template <typename Tree>
+        friend class btree_set_container;
+        template <typename Tree>
+        friend class btree_map_container;
+        template <typename Tree>
+        friend class btree_multiset_container;
+        template <typename N, typename R, typename P>
+        friend struct btree_iterator;
+        template <typename TreeType, typename CheckerType>
+        friend class base_checker;
+
+        const key_type &key() const { return node->key(position); }
+        slot_type *slot() { return node->slot(position); }
+
+        // The node in the tree the iterator is pointing at.
+        Node *node;
+        // The position within the node of the tree the iterator is pointing at.
+        // TODO(ezb): make this a field_type
+        int position;
+    };
+
+    template <typename Params>
+    class btree {
+        using node_type = btree_node<Params>;
+        using is_key_compare_to = typename Params::is_key_compare_to;
+
+        // We use a static empty node for the root/leftmost/rightmost of empty btrees
+        // in order to avoid branching in begin()/end().
+        struct alignas(node_type::Alignment()) EmptyNodeType : node_type {
+            using field_type = typename node_type::field_type;
+            node_type *parent;
+            field_type position = 0;
+            field_type start = 0;
+            field_type count = 0;
+            // max_count must be != kInternalNodeMaxCount (so that this node is regarded
+            // as a leaf node). max_count() is never called when the tree is empty.
+            field_type max_count = node_type::kInternalNodeMaxCount + 1;
+
+#ifdef _MSC_VER
+            // MSVC has constexpr code generations bugs here.
+            EmptyNodeType() : parent(this) {}
+#else
+            constexpr EmptyNodeType(node_type *p) : parent(p) {}
+#endif
+        };
+
+        static node_type *EmptyNode() {
+#ifdef _MSC_VER
+            static EmptyNodeType empty_node;
+            // This assert fails on some other construction methods.
+            assert(empty_node.parent == &empty_node);
+            return &empty_node;
+#else
+            static constexpr EmptyNodeType empty_node(
+                const_cast<EmptyNodeType *>(&empty_node));
+            return const_cast<EmptyNodeType *>(&empty_node);
+#endif
+        }
+
+        enum {
+            kNodeValues = node_type::kNodeValues,
+            kMinNodeValues = kNodeValues / 2,
+        };
+
+        struct node_stats {
+            using size_type = typename Params::size_type;
+
+            node_stats(size_type l, size_type i)
+                : leaf_nodes(l),
+                  internal_nodes(i) {
+            }
+
+            node_stats& operator+=(const node_stats &x) {
+                leaf_nodes += x.leaf_nodes;
+                internal_nodes += x.internal_nodes;
+                return *this;
+            }
+
+            size_type leaf_nodes;
+            size_type internal_nodes;
+        };
+
+    public:
+        using key_type = typename Params::key_type;
+        using value_type = typename Params::value_type;
+        using size_type = typename Params::size_type;
+        using difference_type = typename Params::difference_type;
+        using key_compare = typename Params::key_compare;
+        using value_compare = typename Params::value_compare;
+        using allocator_type = typename Params::allocator_type;
+        using reference = typename Params::reference;
+        using const_reference = typename Params::const_reference;
+        using pointer = typename Params::pointer;
+        using const_pointer = typename Params::const_pointer;
+        using iterator = btree_iterator<node_type, reference, pointer>;
+        using const_iterator = typename iterator::const_iterator;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+        using node_handle_type = node_handle<Params, Params, allocator_type>;
+
+        // Internal types made public for use by btree_container types.
+        using params_type = Params;
+        using slot_type = typename Params::slot_type;
+
+    private:
+        // For use in copy_or_move_values_in_order.
+        const value_type &maybe_move_from_iterator(const_iterator x) { return *x; }
+        value_type &&maybe_move_from_iterator(iterator x) { return std::move(*x); }
+
+        // Copies or moves (depending on the template parameter) the values in
+        // x into this btree in their order in x. This btree must be empty before this
+        // method is called. This method is used in copy construction, copy
+        // assignment, and move assignment.
+        template <typename Btree>
+        void copy_or_move_values_in_order(Btree *x);
+
+        // Validates that various assumptions/requirements are true at compile time.
+        constexpr static bool static_assert_validation();
+
+    public:
+        btree(const key_compare &comp, const allocator_type &alloc);
+
+        btree(const btree &x);
+        btree(btree &&x) noexcept
+            : root_(std::move(x.root_)),
+            rightmost_(phmap::exchange(x.rightmost_, EmptyNode())),
+            size_(phmap::exchange(x.size_, 0)) {
+            x.mutable_root() = EmptyNode();
+        }
+
+        ~btree() {
+            // Put static_asserts in destructor to avoid triggering them before the type
+            // is complete.
+            static_assert(static_assert_validation(), "This call must be elided.");
+            clear();
+        }
+
+        // Assign the contents of x to *this.
+        btree &operator=(const btree &x);
+        btree &operator=(btree &&x) noexcept;
+
+        iterator begin() {
+            return iterator(leftmost(), 0);
+        }
+        const_iterator begin() const {
+            return const_iterator(leftmost(), 0);
+        }
+        iterator end() { return iterator(rightmost_, rightmost_->count()); }
+        const_iterator end() const {
+            return const_iterator(rightmost_, rightmost_->count());
+        }
+        reverse_iterator rbegin() {
+            return reverse_iterator(end());
+        }
+        const_reverse_iterator rbegin() const {
+            return const_reverse_iterator(end());
+        }
+        reverse_iterator rend() {
+            return reverse_iterator(begin());
+        }
+        const_reverse_iterator rend() const {
+            return const_reverse_iterator(begin());
+        }
+
+        // Finds the first element whose key is not less than key.
+        template <typename K>
+        iterator lower_bound(const K &key) {
+            return internal_end(internal_lower_bound(key));
+        }
+        template <typename K>
+        const_iterator lower_bound(const K &key) const {
+            return internal_end(internal_lower_bound(key));
+        }
+
+        // Finds the first element whose key is greater than key.
+        template <typename K>
+        iterator upper_bound(const K &key) {
+            return internal_end(internal_upper_bound(key));
+        }
+        template <typename K>
+        const_iterator upper_bound(const K &key) const {
+            return internal_end(internal_upper_bound(key));
+        }
+
+        // Finds the range of values which compare equal to key. The first member of
+        // the returned pair is equal to lower_bound(key). The second member pair of
+        // the pair is equal to upper_bound(key).
+        template <typename K>
+        std::pair<iterator, iterator> equal_range(const K &key) {
+            return {lower_bound(key), upper_bound(key)};
+        }
+        template <typename K>
+        std::pair<const_iterator, const_iterator> equal_range(const K &key) const {
+            return {lower_bound(key), upper_bound(key)};
+        }
+
+        // Inserts a value into the btree only if it does not already exist. The
+        // boolean return value indicates whether insertion succeeded or failed.
+        // Requirement: if `key` already exists in the btree, does not consume `args`.
+        // Requirement: `key` is never referenced after consuming `args`.
+        template <typename... Args>
+        std::pair<iterator, bool> insert_unique(const key_type &key, Args &&... args);
+
+        // Inserts with hint. Checks to see if the value should be placed immediately
+        // before `position` in the tree. If so, then the insertion will take
+        // amortized constant time. If not, the insertion will take amortized
+        // logarithmic time as if a call to insert_unique() were made.
+        // Requirement: if `key` already exists in the btree, does not consume `args`.
+        // Requirement: `key` is never referenced after consuming `args`.
+        template <typename... Args>
+        std::pair<iterator, bool> insert_hint_unique(iterator position,
+                                                     const key_type &key,
+                                                     Args &&... args);
+
+        // Insert a range of values into the btree.
+        template <typename InputIterator>
+        void insert_iterator_unique(InputIterator b, InputIterator e);
+
+        // Inserts a value into the btree.
+        template <typename ValueType>
+        iterator insert_multi(const key_type &key, ValueType &&v);
+
+        // Inserts a value into the btree.
+        template <typename ValueType>
+        iterator insert_multi(ValueType &&v) {
+            return insert_multi(params_type::key(v), std::forward<ValueType>(v));
+        }
+
+        // Insert with hint. Check to see if the value should be placed immediately
+        // before position in the tree. If it does, then the insertion will take
+        // amortized constant time. If not, the insertion will take amortized
+        // logarithmic time as if a call to insert_multi(v) were made.
+        template <typename ValueType>
+        iterator insert_hint_multi(iterator position, ValueType &&v);
+
+        // Insert a range of values into the btree.
+        template <typename InputIterator>
+        void insert_iterator_multi(InputIterator b, InputIterator e);
+
+        // Erase the specified iterator from the btree. The iterator must be valid
+        // (i.e. not equal to end()).  Return an iterator pointing to the node after
+        // the one that was erased (or end() if none exists).
+        // Requirement: does not read the value at `*iter`.
+        iterator erase(iterator iter);
+
+        // Erases range. Returns the number of keys erased and an iterator pointing
+        // to the element after the last erased element.
+        std::pair<size_type, iterator> erase(iterator begin, iterator end);
+
+        // Erases the specified key from the btree. Returns 1 if an element was
+        // erased and 0 otherwise.
+        template <typename K>
+        size_type erase_unique(const K &key);
+
+        // Erases all of the entries matching the specified key from the
+        // btree. Returns the number of elements erased.
+        template <typename K>
+        size_type erase_multi(const K &key);
+
+        // Finds the iterator corresponding to a key or returns end() if the key is
+        // not present.
+        template <typename K>
+        iterator find(const K &key) {
+            return internal_end(internal_find(key));
+        }
+        template <typename K>
+        const_iterator find(const K &key) const {
+            return internal_end(internal_find(key));
+        }
+
+        // Returns a count of the number of times the key appears in the btree.
+        template <typename K>
+        size_type count_unique(const K &key) const {
+            const iterator beg = internal_find(key);
+            if (beg.node == nullptr) {
+                // The key doesn't exist in the tree.
+                return 0;
+            }
+            return 1;
+        }
+        // Returns a count of the number of times the key appears in the btree.
+        template <typename K>
+        size_type count_multi(const K &key) const {
+            const auto range = equal_range(key);
+            return std::distance(range.first, range.second);
+        }
+
+        // Clear the btree, deleting all of the values it contains.
+        void clear();
+
+        // Swap the contents of *this and x.
+        void swap(btree &x);
+
+        const key_compare &key_comp() const noexcept {
+            return root_.template get<0>();
+        }
+        template <typename K, typename LK>
+        bool compare_keys(const K &x, const LK &y) const {
+            return compare_internal::compare_result_as_less_than(key_comp()(x, y));
+        }
+
+        value_compare value_comp() const { return value_compare(key_comp()); }
+
+        // Verifies the structure of the btree.
+        void verify() const;
+
+        // Size routines.
+        size_type size() const { return size_; }
+        size_type max_size() const { return (std::numeric_limits<size_type>::max)(); }
+        bool empty() const { return size_ == 0; }
+
+        // The height of the btree. An empty tree will have height 0.
+        size_type height() const {
+            size_type h = 0;
+            if (!empty()) {
+                // Count the length of the chain from the leftmost node up to the
+                // root. We actually count from the root back around to the level below
+                // the root, but the calculation is the same because of the circularity
+                // of that traversal.
+                const node_type *n = root();
+                do {
+                    ++h;
+                    n = n->parent();
+                } while (n != root());
+            }
+            return h;
+        }
+
+        // The number of internal, leaf and total nodes used by the btree.
+        size_type leaf_nodes() const {
+            return internal_stats(root()).leaf_nodes;
+        }
+        size_type internal_nodes() const {
+            return internal_stats(root()).internal_nodes;
+        }
+        size_type nodes() const {
+            node_stats stats = internal_stats(root());
+            return stats.leaf_nodes + stats.internal_nodes;
+        }
+
+        // The total number of bytes used by the btree.
+        size_type bytes_used() const {
+            node_stats stats = internal_stats(root());
+            if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
+                return sizeof(*this) +
+                    node_type::LeafSize(root()->max_count());
+            } else {
+                return sizeof(*this) +
+                    stats.leaf_nodes * node_type::LeafSize() +
+                    stats.internal_nodes * node_type::InternalSize();
+            }
+        }
+
+        // The average number of bytes used per value stored in the btree.
+        static double average_bytes_per_value() {
+            // Returns the number of bytes per value on a leaf node that is 75%
+            // full. Experimentally, this matches up nicely with the computed number of
+            // bytes per value in trees that had their values inserted in random order.
+            return node_type::LeafSize() / (kNodeValues * 0.75);
+        }
+
+        // The fullness of the btree. Computed as the number of elements in the btree
+        // divided by the maximum number of elements a tree with the current number
+        // of nodes could hold. A value of 1 indicates perfect space
+        // utilization. Smaller values indicate space wastage.
+        // Returns 0 for empty trees.
+        double fullness() const {
+            if (empty()) return 0.0;
+            return static_cast<double>(size()) / (nodes() * kNodeValues);
+        }
+        // The overhead of the btree structure in bytes per node. Computed as the
+        // total number of bytes used by the btree minus the number of bytes used for
+        // storing elements divided by the number of elements.
+        // Returns 0 for empty trees.
+        double overhead() const {
+            if (empty()) return 0.0;
+            return (bytes_used() - size() * sizeof(value_type)) /
+                static_cast<double>(size());
+        }
+
+        // The allocator used by the btree.
+        allocator_type get_allocator() const {
+            return allocator();
+        }
+
+    private:
+        // Internal accessor routines.
+        node_type *root() { return root_.template get<2>(); }
+        const node_type *root() const { return root_.template get<2>(); }
+        node_type *&mutable_root() noexcept { return root_.template get<2>(); }
+        key_compare *mutable_key_comp() noexcept { return &root_.template get<0>(); }
+
+        // The leftmost node is stored as the parent of the root node.
+        node_type *leftmost() { return root()->parent(); }
+        const node_type *leftmost() const { return root()->parent(); }
+
+        // Allocator routines.
+        allocator_type *mutable_allocator() noexcept {
+            return &root_.template get<1>();
+        }
+        const allocator_type &allocator() const noexcept {
+            return root_.template get<1>();
+        }
+
+        // Allocates a correctly aligned node of at least size bytes using the
+        // allocator.
+        node_type *allocate(const size_type sz) {
+            return reinterpret_cast<node_type *>(
+                phmap::priv::Allocate<node_type::Alignment()>(
+                    mutable_allocator(), (size_t)sz));
+        }
+
+        // Node creation/deletion routines.
+        node_type* new_internal_node(node_type *parent) {
+            node_type *p = allocate(node_type::InternalSize());
+            return node_type::init_internal(p, parent);
+        }
+        node_type* new_leaf_node(node_type *parent) {
+            node_type *p = allocate(node_type::LeafSize());
+            return node_type::init_leaf(p, parent, kNodeValues);
+        }
+        node_type *new_leaf_root_node(const int max_count) {
+            node_type *p = allocate(node_type::LeafSize(max_count));
+            return node_type::init_leaf(p, p, max_count);
+        }
+
+        // Deletion helper routines.
+        void erase_same_node(iterator begin, iterator end);
+        iterator erase_from_leaf_node(iterator begin, size_type to_erase);
+        iterator rebalance_after_delete(iterator iter);
+
+        // Deallocates a node of a certain size in bytes using the allocator.
+        void deallocate(const size_type sz, node_type *node) {
+            phmap::priv::Deallocate<node_type::Alignment()>(
+                mutable_allocator(), node, (size_t)sz);
+        }
+
+        void delete_internal_node(node_type *node) {
+            node->destroy(mutable_allocator());
+            deallocate(node_type::InternalSize(), node);
+        }
+        void delete_leaf_node(node_type *node) {
+            node->destroy(mutable_allocator());
+            deallocate(node_type::LeafSize(node->max_count()), node);
+        }
+
+        // Rebalances or splits the node iter points to.
+        void rebalance_or_split(iterator *iter);
+
+        // Merges the values of left, right and the delimiting key on their parent
+        // onto left, removing the delimiting key and deleting right.
+        void merge_nodes(node_type *left, node_type *right);
+
+        // Tries to merge node with its left or right sibling, and failing that,
+        // rebalance with its left or right sibling. Returns true if a merge
+        // occurred, at which point it is no longer valid to access node. Returns
+        // false if no merging took place.
+        bool try_merge_or_rebalance(iterator *iter);
+
+        // Tries to shrink the height of the tree by 1.
+        void try_shrink();
+
+        iterator internal_end(iterator iter) {
+            return iter.node != nullptr ? iter : end();
+        }
+        const_iterator internal_end(const_iterator iter) const {
+            return iter.node != nullptr ? iter : end();
+        }
+
+        // Emplaces a value into the btree immediately before iter. Requires that
+        // key(v) <= iter.key() and (--iter).key() <= key(v).
+        template <typename... Args>
+        iterator internal_emplace(iterator iter, Args &&... args);
+
+        // Returns an iterator pointing to the first value >= the value "iter" is
+        // pointing at. Note that "iter" might be pointing to an invalid location as
+        // iter.position == iter.node->count(). This routine simply moves iter up in
+        // the tree to a valid location.
+        // Requires: iter.node is non-null.
+        template <typename IterType>
+        static IterType internal_last(IterType iter);
+
+        // Returns an iterator pointing to the leaf position at which key would
+        // reside in the tree. We provide 2 versions of internal_locate. The first
+        // version uses a less-than comparator and is incapable of distinguishing when
+        // there is an exact match. The second version is for the key-compare-to
+        // specialization and distinguishes exact matches. The key-compare-to
+        // specialization allows the caller to avoid a subsequent comparison to
+        // determine if an exact match was made, which is important for keys with
+        // expensive comparison, such as strings.
+        template <typename K>
+        SearchResult<iterator, is_key_compare_to::value> internal_locate(
+            const K &key) const;
+
+        template <typename K>
+        SearchResult<iterator, false> internal_locate_impl(
+            const K &key, std::false_type /* IsCompareTo */) const;
+
+        template <typename K>
+        SearchResult<iterator, true> internal_locate_impl(
+            const K &key, std::true_type /* IsCompareTo */) const;
+
+        // Internal routine which implements lower_bound().
+        template <typename K>
+        iterator internal_lower_bound(const K &key) const;
+
+        // Internal routine which implements upper_bound().
+        template <typename K>
+        iterator internal_upper_bound(const K &key) const;
+
+        // Internal routine which implements find().
+        template <typename K>
+        iterator internal_find(const K &key) const;
+
+        // Deletes a node and all of its children.
+        void internal_clear(node_type *node);
+
+        // Verifies the tree structure of node.
+        int internal_verify(const node_type *node,
+                            const key_type *lo, const key_type *hi) const;
+
+        node_stats internal_stats(const node_type *node) const {
+            // The root can be a static empty node.
+            if (node == nullptr || (node == root() && empty())) {
+                return node_stats(0, 0);
+            }
+            if (node->leaf()) {
+                return node_stats(1, 0);
+            }
+            node_stats res(0, 1);
+            for (int i = 0; i <= node->count(); ++i) {
+                res += internal_stats(node->child(i));
+            }
+            return res;
+        }
+
+    public:
+        // Exposed only for tests.
+        static bool testonly_uses_linear_node_search() {
+            return node_type::testonly_uses_linear_node_search();
+        }
+
+    private:
+        // We use compressed tuple in order to save space because key_compare and
+        // allocator_type are usually empty.
+        phmap::priv::CompressedTuple<key_compare, allocator_type,
+                                                    node_type *>
+        root_;
+
+        // A pointer to the rightmost node. Note that the leftmost node is stored as
+        // the root's parent.
+        node_type *rightmost_;
+
+        // Number of values.
+        size_type size_;
+    };
+
+    ////
+    // btree_node methods
+    template <typename P>
+    template <typename... Args>
+    inline void btree_node<P>::emplace_value(const size_type i,
+                                             allocator_type *alloc,
+                                             Args &&... args) {
+        assert(i <= count());
+        // Shift old values to create space for new value and then construct it in
+        // place.
+        if (i < count()) {
+            value_init(count(), alloc, slot(count() - 1));
+            for (size_type j = count() - 1; j > i; --j)
+                params_type::move(alloc, slot(j - 1), slot(j));
+            value_destroy(i, alloc);
+        }
+        value_init(i, alloc, std::forward<Args>(args)...);
+        set_count((field_type)(count() + 1));
+
+        if (!leaf() && count() > i + 1) {
+            for (int j = count(); j > (int)(i + 1); --j) {
+                set_child(j, child(j - 1));
+            }
+            clear_child(i + 1);
+        }
+    }
+
+    template <typename P>
+    inline void btree_node<P>::remove_value(const int i, allocator_type *alloc) {
+        if (!leaf() && count() > i + 1) {
+            assert(child(i + 1)->count() == 0);
+            for (size_type j = i + 1; j < count(); ++j) {
+                set_child(j, child(j + 1));
+            }
+            clear_child(count());
+        }
+
+        remove_values_ignore_children(i, /*to_erase=*/1, alloc);
+    }
+
+    template <typename P>
+    inline void btree_node<P>::remove_values_ignore_children(
+        int i, size_type to_erase, allocator_type *alloc) {
+        params_type::move(alloc, slot(i + to_erase), slot(count()), slot(i));
+        value_destroy_n(count() - to_erase, to_erase, alloc);
+        set_count((field_type)(count() - to_erase));
+    }
+
+    template <typename P>
+    void btree_node<P>::rebalance_right_to_left(const int to_move,
+                                                btree_node *right,
+                                                allocator_type *alloc) {
+        assert(parent() == right->parent());
+        assert(position() + 1 == right->position());
+        assert(right->count() >= count());
+        assert(to_move >= 1);
+        assert(to_move <= right->count());
+
+        // 1) Move the delimiting value in the parent to the left node.
+        value_init(count(), alloc, parent()->slot(position()));
+
+        // 2) Move the (to_move - 1) values from the right node to the left node.
+        right->uninitialized_move_n(to_move - 1, 0, count() + 1, this, alloc);
+
+        // 3) Move the new delimiting value to the parent from the right node.
+        params_type::move(alloc, right->slot(to_move - 1),
+                          parent()->slot(position()));
+
+        // 4) Shift the values in the right node to their correct position.
+        params_type::move(alloc, right->slot(to_move), right->slot(right->count()),
+                          right->slot(0));
+
+        // 5) Destroy the now-empty to_move entries in the right node.
+        right->value_destroy_n(right->count() - to_move, to_move, alloc);
+
+        if (!leaf()) {
+            // Move the child pointers from the right to the left node.
+            for (int i = 0; i < to_move; ++i) {
+                init_child(count() + i + 1, right->child(i));
+            }
+            for (int i = 0; i <= right->count() - to_move; ++i) {
+                assert(i + to_move <= right->max_count());
+                right->init_child(i, right->child(i + to_move));
+                right->clear_child(i + to_move);
+            }
+        }
+
+        // Fixup the counts on the left and right nodes.
+        set_count((field_type)(count() + to_move));
+        right->set_count((field_type)(right->count() - to_move));
+    }
+
+    template <typename P>
+    void btree_node<P>::rebalance_left_to_right(const int to_move,
+                                                btree_node *right,
+                                                allocator_type *alloc) {
+        assert(parent() == right->parent());
+        assert(position() + 1 == right->position());
+        assert(count() >= right->count());
+        assert(to_move >= 1);
+        assert(to_move <= count());
+
+        // Values in the right node are shifted to the right to make room for the
+        // new to_move values. Then, the delimiting value in the parent and the
+        // other (to_move - 1) values in the left node are moved into the right node.
+        // Lastly, a new delimiting value is moved from the left node into the
+        // parent, and the remaining empty left node entries are destroyed.
+
+        if (right->count() >= to_move) {
+            // The original location of the right->count() values are sufficient to hold
+            // the new to_move entries from the parent and left node.
+
+            // 1) Shift existing values in the right node to their correct positions.
+            right->uninitialized_move_n(to_move, right->count() - to_move,
+                                        right->count(), right, alloc);
+            for (slot_type *src = right->slot(right->count() - to_move - 1),
+                     *dest = right->slot(right->count() - 1),
+                     *end = right->slot(0);
+                 src >= end; --src, --dest) {
+                params_type::move(alloc, src, dest);
+            }
+
+            // 2) Move the delimiting value in the parent to the right node.
+            params_type::move(alloc, parent()->slot(position()),
+                              right->slot(to_move - 1));
+
+            // 3) Move the (to_move - 1) values from the left node to the right node.
+            params_type::move(alloc, slot(count() - (to_move - 1)), slot(count()),
+                              right->slot(0));
+        } else {
+            // The right node does not have enough initialized space to hold the new
+            // to_move entries, so part of them will move to uninitialized space.
+
+            // 1) Shift existing values in the right node to their correct positions.
+            right->uninitialized_move_n(right->count(), 0, to_move, right, alloc);
+
+            // 2) Move the delimiting value in the parent to the right node.
+            right->value_init(to_move - 1, alloc, parent()->slot(position()));
+
+            // 3) Move the (to_move - 1) values from the left node to the right node.
+            const size_type uninitialized_remaining = to_move - right->count() - 1;
+            uninitialized_move_n(uninitialized_remaining,
+                                 count() - uninitialized_remaining, right->count(),
+                                 right, alloc);
+            params_type::move(alloc, slot(count() - (to_move - 1)),
+                              slot(count() - uninitialized_remaining), right->slot(0));
+        }
+
+        // 4) Move the new delimiting value to the parent from the left node.
+        params_type::move(alloc, slot(count() - to_move), parent()->slot(position()));
+
+        // 5) Destroy the now-empty to_move entries in the left node.
+        value_destroy_n(count() - to_move, to_move, alloc);
+
+        if (!leaf()) {
+            // Move the child pointers from the left to the right node.
+            for (int i = right->count(); i >= 0; --i) {
+                right->init_child(i + to_move, right->child(i));
+                right->clear_child(i);
+            }
+            for (int i = 1; i <= to_move; ++i) {
+                right->init_child(i - 1, child(count() - to_move + i));
+                clear_child(count() - to_move + i);
+            }
+        }
+
+        // Fixup the counts on the left and right nodes.
+        set_count((field_type)(count() - to_move));
+        right->set_count((field_type)(right->count() + to_move));
+    }
+
+    template <typename P>
+    void btree_node<P>::split(const int insert_position, btree_node *dest,
+                              allocator_type *alloc) {
+        assert(dest->count() == 0);
+        assert(max_count() == kNodeValues);
+
+        // We bias the split based on the position being inserted. If we're
+        // inserting at the beginning of the left node then bias the split to put
+        // more values on the right node. If we're inserting at the end of the
+        // right node then bias the split to put more values on the left node.
+        if (insert_position == 0) {
+            dest->set_count((field_type)(count() - 1));
+        } else if (insert_position == kNodeValues) {
+            dest->set_count(0);
+        } else {
+            dest->set_count((field_type)(count() / 2));
+        }
+        set_count((field_type)(count() - dest->count()));
+        assert(count() >= 1);
+
+        // Move values from the left sibling to the right sibling.
+        uninitialized_move_n(dest->count(), count(), 0, dest, alloc);
+
+        // Destroy the now-empty entries in the left node.
+        value_destroy_n(count(), dest->count(), alloc);
+
+        // The split key is the largest value in the left sibling.
+        set_count((field_type)(count() - 1));
+        parent()->emplace_value(position(), alloc, slot(count()));
+        value_destroy(count(), alloc);
+        parent()->init_child(position() + 1, dest);
+
+        if (!leaf()) {
+            for (int i = 0; i <= dest->count(); ++i) {
+                assert(child(count() + i + 1) != nullptr);
+                dest->init_child(i, child(count() + i + 1));
+                clear_child(count() + i + 1);
+            }
+        }
+    }
+
+    template <typename P>
+    void btree_node<P>::merge(btree_node *src, allocator_type *alloc) {
+        assert(parent() == src->parent());
+        assert(position() + 1 == src->position());
+
+        // Move the delimiting value to the left node.
+        value_init(count(), alloc, parent()->slot(position()));
+
+        // Move the values from the right to the left node.
+        src->uninitialized_move_n(src->count(), 0, count() + 1, this, alloc);
+
+        // Destroy the now-empty entries in the right node.
+        src->value_destroy_n(0, src->count(), alloc);
+
+        if (!leaf()) {
+            // Move the child pointers from the right to the left node.
+            for (int i = 0; i <= src->count(); ++i) {
+                init_child(count() + i + 1, src->child(i));
+                src->clear_child(i);
+            }
+        }
+
+        // Fixup the counts on the src and dest nodes.
+        set_count((field_type)(1 + count() + src->count()));
+        src->set_count(0);
+
+        // Remove the value on the parent node.
+        parent()->remove_value(position(), alloc);
+    }
+
+    template <typename P>
+    void btree_node<P>::swap(btree_node *x, allocator_type *alloc) {
+        using std::swap;
+        assert(leaf() == x->leaf());
+
+        // Determine which is the smaller/larger node.
+        btree_node *smaller = this, *larger = x;
+        if (smaller->count() > larger->count()) {
+            swap(smaller, larger);
+        }
+
+        // Swap the values.
+        for (slot_type *a = smaller->slot(0), *b = larger->slot(0),
+                 *end = a + smaller->count();
+             a != end; ++a, ++b) {
+            params_type::swap(alloc, a, b);
+        }
+
+        // Move values that can't be swapped.
+        const size_type to_move = larger->count() - smaller->count();
+        larger->uninitialized_move_n(to_move, smaller->count(), smaller->count(),
+                                     smaller, alloc);
+        larger->value_destroy_n(smaller->count(), to_move, alloc);
+
+        if (!leaf()) {
+            // Swap the child pointers.
+            std::swap_ranges(&smaller->mutable_child(0),
+                             &smaller->mutable_child(smaller->count() + 1),
+                             &larger->mutable_child(0));
+            // Update swapped children's parent pointers.
+            int i = 0;
+            for (; i <= smaller->count(); ++i) {
+                smaller->child(i)->set_parent(smaller);
+                larger->child(i)->set_parent(larger);
+            }
+            // Move the child pointers that couldn't be swapped.
+            for (; i <= larger->count(); ++i) {
+                smaller->init_child(i, larger->child(i));
+                larger->clear_child(i);
+            }
+        }
+
+        // Swap the counts.
+        swap(mutable_count(), x->mutable_count());
+    }
+
+    ////
+    // btree_iterator methods
+    template <typename N, typename R, typename P>
+    void btree_iterator<N, R, P>::increment_slow() {
+        if (node->leaf()) {
+            assert(position >= node->count());
+            btree_iterator save(*this);
+            while (position == node->count() && !node->is_root()) {
+                assert(node->parent()->child(node->position()) == node);
+                position = node->position();
+                node = node->parent();
+            }
+            if (position == node->count()) {
+                *this = save;
+            }
+        } else {
+            assert(position < node->count());
+            node = node->child(position + 1);
+            while (!node->leaf()) {
+                node = node->child(0);
+            }
+            position = 0;
+        }
+    }
+
+    template <typename N, typename R, typename P>
+    void btree_iterator<N, R, P>::decrement_slow() {
+        if (node->leaf()) {
+            assert(position <= -1);
+            btree_iterator save(*this);
+            while (position < 0 && !node->is_root()) {
+                assert(node->parent()->child(node->position()) == node);
+                position = node->position() - 1;
+                node = node->parent();
+            }
+            if (position < 0) {
+                *this = save;
+            }
+        } else {
+            assert(position >= 0);
+            node = node->child(position);
+            while (!node->leaf()) {
+                node = node->child(node->count());
+            }
+            position = node->count() - 1;
+        }
+    }
+
+    ////
+    // btree methods
+    template <typename P>
+    template <typename Btree>
+    void btree<P>::copy_or_move_values_in_order(Btree *x) {
+        static_assert(std::is_same<btree, Btree>::value ||
+                      std::is_same<const btree, Btree>::value,
+                      "Btree type must be same or const.");
+        assert(empty());
+
+        // We can avoid key comparisons because we know the order of the
+        // values is the same order we'll store them in.
+        auto iter = x->begin();
+        if (iter == x->end()) return;
+        insert_multi(maybe_move_from_iterator(iter));
+        ++iter;
+        for (; iter != x->end(); ++iter) {
+            // If the btree is not empty, we can just insert the new value at the end
+            // of the tree.
+            internal_emplace(end(), maybe_move_from_iterator(iter));
+        }
+    }
+
+    template <typename P>
+    constexpr bool btree<P>::static_assert_validation() {
+        static_assert(std::is_nothrow_copy_constructible<key_compare>::value,
+                      "Key comparison must be nothrow copy constructible");
+        static_assert(std::is_nothrow_copy_constructible<allocator_type>::value,
+                      "Allocator must be nothrow copy constructible");
+        static_assert(type_traits_internal::is_trivially_copyable<iterator>::value,
+                      "iterator not trivially copyable.");
+
+        // Note: We assert that kTargetValues, which is computed from
+        // Params::kTargetNodeSize, must fit the node_type::field_type.
+        static_assert(
+            kNodeValues < (1 << (8 * sizeof(typename node_type::field_type))),
+            "target node size too large");
+
+        // Verify that key_compare returns an phmap::{weak,strong}_ordering or bool.
+        using compare_result_type =
+            phmap::invoke_result_t<key_compare, key_type, key_type>;
+        static_assert(
+            std::is_same<compare_result_type, bool>::value ||
+            std::is_convertible<compare_result_type, phmap::weak_ordering>::value,
+            "key comparison function must return phmap::{weak,strong}_ordering or "
+            "bool.");
+
+        // Test the assumption made in setting kNodeValueSpace.
+        static_assert(node_type::MinimumOverhead() >= sizeof(void *) + 4,
+                      "node space assumption incorrect");
+
+        return true;
+    }
+
+    template <typename P>
+    btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
+        : root_(comp, alloc, EmptyNode()), rightmost_(EmptyNode()), size_(0) {}
+
+    template <typename P>
+    btree<P>::btree(const btree &x) : btree(x.key_comp(), x.allocator()) {
+        copy_or_move_values_in_order(&x);
+    }
+
+    template <typename P>
+    template <typename... Args>
+    auto btree<P>::insert_unique(const key_type &key, Args &&... args)
+        -> std::pair<iterator, bool> {
+        if (empty()) {
+            mutable_root() = rightmost_ = new_leaf_root_node(1);
+        }
+
+        auto res = internal_locate(key);
+        iterator &iter = res.value;
+
+        if (res.HasMatch()) {
+            if (res.IsEq()) {
+                // The key already exists in the tree, do nothing.
+                return {iter, false};
+            }
+        } else {
+            iterator last = internal_last(iter);
+            if (last.node && !compare_keys(key, last.key())) {
+                // The key already exists in the tree, do nothing.
+                return {last, false};
+            }
+        }
+        return {internal_emplace(iter, std::forward<Args>(args)...), true};
+    }
+
+    template <typename P>
+    template <typename... Args>
+    inline auto btree<P>::insert_hint_unique(iterator position, const key_type &key,
+                                             Args &&... args)
+        -> std::pair<iterator, bool> {
+        if (!empty()) {
+            if (position == end() || compare_keys(key, position.key())) {
+                iterator prev = position;
+                if (position == begin() || compare_keys((--prev).key(), key)) {
+                    // prev.key() < key < position.key()
+                    return {internal_emplace(position, std::forward<Args>(args)...), true};
+                }
+            } else if (compare_keys(position.key(), key)) {
+                ++position;
+                if (position == end() || compare_keys(key, position.key())) {
+                    // {original `position`}.key() < key < {current `position`}.key()
+                    return {internal_emplace(position, std::forward<Args>(args)...), true};
+                }
+            } else {
+                // position.key() == key
+                return {position, false};
+            }
+        }
+        return insert_unique(key, std::forward<Args>(args)...);
+    }
+
+    template <typename P>
+    template <typename InputIterator>
+    void btree<P>::insert_iterator_unique(InputIterator b, InputIterator e) {
+        for (; b != e; ++b) {
+            insert_hint_unique(end(), params_type::key(*b), *b);
+        }
+    }
+
+    template <typename P>
+    template <typename ValueType>
+    auto btree<P>::insert_multi(const key_type &key, ValueType &&v) -> iterator {
+        if (empty()) {
+            mutable_root() = rightmost_ = new_leaf_root_node(1);
+        }
+
+        iterator iter = internal_upper_bound(key);
+        if (iter.node == nullptr) {
+            iter = end();
+        }
+        return internal_emplace(iter, std::forward<ValueType>(v));
+    }
+
+    template <typename P>
+    template <typename ValueType>
+    auto btree<P>::insert_hint_multi(iterator position, ValueType &&v) -> iterator {
+        if (!empty()) {
+            const key_type &key = params_type::key(v);
+            if (position == end() || !compare_keys(position.key(), key)) {
+                iterator prev = position;
+                if (position == begin() || !compare_keys(key, (--prev).key())) {
+                    // prev.key() <= key <= position.key()
+                    return internal_emplace(position, std::forward<ValueType>(v));
+                }
+            } else {
+                iterator next = position;
+                ++next;
+                if (next == end() || !compare_keys(next.key(), key)) {
+                    // position.key() < key <= next.key()
+                    return internal_emplace(next, std::forward<ValueType>(v));
+                }
+            }
+        }
+        return insert_multi(std::forward<ValueType>(v));
+    }
+
+    template <typename P>
+    template <typename InputIterator>
+    void btree<P>::insert_iterator_multi(InputIterator b, InputIterator e) {
+        for (; b != e; ++b) {
+            insert_hint_multi(end(), *b);
+        }
+    }
+
+    template <typename P>
+    auto btree<P>::operator=(const btree &x) -> btree & {
+        if (this != &x) {
+            clear();
+
+            *mutable_key_comp() = x.key_comp();
+            if (phmap::allocator_traits<
+                allocator_type>::propagate_on_container_copy_assignment::value) {
+                *mutable_allocator() = x.allocator();
+            }
+
+            copy_or_move_values_in_order(&x);
+        }
+        return *this;
+    }
+
+    template <typename P>
+    auto btree<P>::operator=(btree &&x) noexcept -> btree & {
+        if (this != &x) {
+            clear();
+
+            using std::swap;
+            if (phmap::allocator_traits<
+                allocator_type>::propagate_on_container_copy_assignment::value) {
+                // Note: `root_` also contains the allocator and the key comparator.
+                swap(root_, x.root_);
+                swap(rightmost_, x.rightmost_);
+                swap(size_, x.size_);
+            } else {
+                if (allocator() == x.allocator()) {
+                    swap(mutable_root(), x.mutable_root());
+                    swap(*mutable_key_comp(), *x.mutable_key_comp());
+                    swap(rightmost_, x.rightmost_);
+                    swap(size_, x.size_);
+                } else {
+                    // We aren't allowed to propagate the allocator and the allocator is
+                    // different so we can't take over its memory. We must move each element
+                    // individually. We need both `x` and `this` to have `x`s key comparator
+                    // while moving the values so we can't swap the key comparators.
+                    *mutable_key_comp() = x.key_comp();
+                    copy_or_move_values_in_order(&x);
+                }
+            }
+        }
+        return *this;
+    }
+
+    template <typename P>
+    auto btree<P>::erase(iterator iter) -> iterator {
+        bool internal_delete = false;
+        if (!iter.node->leaf()) {
+            // Deletion of a value on an internal node. First, move the largest value
+            // from our left child here, then delete that position (in remove_value()
+            // below). We can get to the largest value from our left child by
+            // decrementing iter.
+            iterator internal_iter(iter);
+            --iter;
+            assert(iter.node->leaf());
+            params_type::move(mutable_allocator(), iter.node->slot(iter.position),
+                              internal_iter.node->slot(internal_iter.position));
+            internal_delete = true;
+        }
+
+        // Delete the key from the leaf.
+        iter.node->remove_value(iter.position, mutable_allocator());
+        --size_;
+
+        // We want to return the next value after the one we just erased. If we
+        // erased from an internal node (internal_delete == true), then the next
+        // value is ++(++iter). If we erased from a leaf node (internal_delete ==
+        // false) then the next value is ++iter. Note that ++iter may point to an
+        // internal node and the value in the internal node may move to a leaf node
+        // (iter.node) when rebalancing is performed at the leaf level.
+
+        iterator res = rebalance_after_delete(iter);
+
+        // If we erased from an internal node, advance the iterator.
+        if (internal_delete) {
+            ++res;
+        }
+        return res;
+    }
+
+    template <typename P>
+    auto btree<P>::rebalance_after_delete(iterator iter) -> iterator {
+        // Merge/rebalance as we walk back up the tree.
+        iterator res(iter);
+        bool first_iteration = true;
+        for (;;) {
+            if (iter.node == root()) {
+                try_shrink();
+                if (empty()) {
+                    return end();
+                }
+                break;
+            }
+            if (iter.node->count() >= kMinNodeValues) {
+                break;
+            }
+            bool merged = try_merge_or_rebalance(&iter);
+            // On the first iteration, we should update `res` with `iter` because `res`
+            // may have been invalidated.
+            if (first_iteration) {
+                res = iter;
+                first_iteration = false;
+            }
+            if (!merged) {
+                break;
+            }
+            iter.position = iter.node->position();
+            iter.node = iter.node->parent();
+        }
+
+        // Adjust our return value. If we're pointing at the end of a node, advance
+        // the iterator.
+        if (res.position == res.node->count()) {
+            res.position = res.node->count() - 1;
+            ++res;
+        }
+
+        return res;
+    }
+
+    template <typename P>
+    auto btree<P>::erase(iterator _begin, iterator _end)
+        -> std::pair<size_type, iterator> {
+        difference_type count = std::distance(_begin, _end);
+        assert(count >= 0);
+
+        if (count == 0) {
+            return {0, _begin};
+        }
+
+        if (count == (difference_type)size_) {
+            clear();
+            return {count, this->end()};
+        }
+
+        if (_begin.node == _end.node) {
+            erase_same_node(_begin, _end);
+            size_ -= count;
+            return {count, rebalance_after_delete(_begin)};
+        }
+
+        const size_type target_size = size_ - count;
+        while (size_ > target_size) {
+            if (_begin.node->leaf()) {
+                const size_type remaining_to_erase = size_ - target_size;
+                const size_type remaining_in_node = _begin.node->count() - _begin.position;
+                _begin = erase_from_leaf_node(
+                    _begin, (std::min)(remaining_to_erase, remaining_in_node));
+            } else {
+                _begin = erase(_begin);
+            }
+        }
+        return {count, _begin};
+    }
+
+    template <typename P>
+    void btree<P>::erase_same_node(iterator _begin, iterator _end) {
+        assert(_begin.node == _end.node);
+        assert(_end.position > _begin.position);
+
+        node_type *node = _begin.node;
+        size_type to_erase = _end.position - _begin.position;
+        if (!node->leaf()) {
+            // Delete all children between _begin and _end.
+            for (size_type i = 0; i < to_erase; ++i) {
+                internal_clear(node->child(_begin.position + i + 1));
+            }
+            // Rotate children after _end into new positions.
+            for (size_type i = _begin.position + to_erase + 1; i <= node->count(); ++i) {
+                node->set_child(i - to_erase, node->child(i));
+                node->clear_child(i);
+            }
+        }
+        node->remove_values_ignore_children(_begin.position, to_erase,
+                                            mutable_allocator());
+
+        // Do not need to update rightmost_, because
+        // * either _end == this->end(), and therefore node == rightmost_, and still
+        //   exists
+        // * or _end != this->end(), and therefore rightmost_ hasn't been erased, since
+        //   it wasn't covered in [_begin, _end)
+    }
+
+    template <typename P>
+    auto btree<P>::erase_from_leaf_node(iterator _begin, size_type to_erase)
+        -> iterator {
+        node_type *node = _begin.node;
+        assert(node->leaf());
+        assert(node->count() > _begin.position);
+        assert(_begin.position + to_erase <= node->count());
+
+        node->remove_values_ignore_children(_begin.position, to_erase,
+                                            mutable_allocator());
+
+        size_ -= to_erase;
+
+        return rebalance_after_delete(_begin);
+    }
+
+    template <typename P>
+    template <typename K>
+    auto btree<P>::erase_unique(const K &key) -> size_type {
+        const iterator iter = internal_find(key);
+        if (iter.node == nullptr) {
+            // The key doesn't exist in the tree, return nothing done.
+            return 0;
+        }
+        erase(iter);
+        return 1;
+    }
+
+    template <typename P>
+    template <typename K>
+    auto btree<P>::erase_multi(const K &key) -> size_type {
+        const iterator _begin = internal_lower_bound(key);
+        if (_begin.node == nullptr) {
+            // The key doesn't exist in the tree, return nothing done.
+            return 0;
+        }
+        // Delete all of the keys between _begin and upper_bound(key).
+        const iterator _end = internal_end(internal_upper_bound(key));
+        return erase(_begin, _end).first;
+    }
+
+    template <typename P>
+    void btree<P>::clear() {
+        if (!empty()) {
+            internal_clear(root());
+        }
+        mutable_root() = EmptyNode();
+        rightmost_ = EmptyNode();
+        size_ = 0;
+    }
+
+    template <typename P>
+    void btree<P>::swap(btree &x) {
+        using std::swap;
+        if (phmap::allocator_traits<
+            allocator_type>::propagate_on_container_swap::value) {
+            // Note: `root_` also contains the allocator and the key comparator.
+            swap(root_, x.root_);
+        } else {
+            // It's undefined behavior if the allocators are unequal here.
+            assert(allocator() == x.allocator());
+            swap(mutable_root(), x.mutable_root());
+            swap(*mutable_key_comp(), *x.mutable_key_comp());
+        }
+        swap(rightmost_, x.rightmost_);
+        swap(size_, x.size_);
+    }
+
+    template <typename P>
+    void btree<P>::verify() const {
+        assert(root() != nullptr);
+        assert(leftmost() != nullptr);
+        assert(rightmost_ != nullptr);
+        assert(empty() || size() == internal_verify(root(), nullptr, nullptr));
+        assert(leftmost() == (++const_iterator(root(), -1)).node);
+        assert(rightmost_ == (--const_iterator(root(), root()->count())).node);
+        assert(leftmost()->leaf());
+        assert(rightmost_->leaf());
+    }
+
+    template <typename P>
+    void btree<P>::rebalance_or_split(iterator *iter) {
+        node_type *&node = iter->node;
+        int &insert_position = iter->position;
+        assert(node->count() == node->max_count());
+        assert(kNodeValues == node->max_count());
+
+        // First try to make room on the node by rebalancing.
+        node_type *parent = node->parent();
+        if (node != root()) {
+            if (node->position() > 0) {
+                // Try rebalancing with our left sibling.
+                node_type *left = parent->child(node->position() - 1);
+                assert(left->max_count() == kNodeValues);
+                if (left->count() < kNodeValues) {
+                    // We bias rebalancing based on the position being inserted. If we're
+                    // inserting at the end of the right node then we bias rebalancing to
+                    // fill up the left node.
+                    int to_move = (kNodeValues - left->count()) /
+                        (1 + (insert_position < kNodeValues));
+                    to_move = (std::max)(1, to_move);
+
+                    if (((insert_position - to_move) >= 0) ||
+                        ((left->count() + to_move) < kNodeValues)) {
+                        left->rebalance_right_to_left(to_move, node, mutable_allocator());
+
+                        assert(node->max_count() - node->count() == to_move);
+                        insert_position = insert_position - to_move;
+                        if (insert_position < 0) {
+                            insert_position = insert_position + left->count() + 1;
+                            node = left;
+                        }
+
+                        assert(node->count() < node->max_count());
+                        return;
+                    }
+                }
+            }
+
+            if (node->position() < parent->count()) {
+                // Try rebalancing with our right sibling.
+                node_type *right = parent->child(node->position() + 1);
+                assert(right->max_count() == kNodeValues);
+                if (right->count() < kNodeValues) {
+                    // We bias rebalancing based on the position being inserted. If we're
+                    // inserting at the _beginning of the left node then we bias rebalancing
+                    // to fill up the right node.
+                    int to_move =
+                        (kNodeValues - right->count()) / (1 + (insert_position > 0));
+                    to_move = (std::max)(1, to_move);
+
+                    if ((insert_position <= (node->count() - to_move)) ||
+                        ((right->count() + to_move) < kNodeValues)) {
+                        node->rebalance_left_to_right(to_move, right, mutable_allocator());
+
+                        if (insert_position > node->count()) {
+                            insert_position = insert_position - node->count() - 1;
+                            node = right;
+                        }
+
+                        assert(node->count() < node->max_count());
+                        return;
+                    }
+                }
+            }
+
+            // Rebalancing failed, make sure there is room on the parent node for a new
+            // value.
+            assert(parent->max_count() == kNodeValues);
+            if (parent->count() == kNodeValues) {
+                iterator parent_iter(node->parent(), node->position());
+                rebalance_or_split(&parent_iter);
+            }
+        } else {
+            // Rebalancing not possible because this is the root node.
+            // Create a new root node and set the current root node as the child of the
+            // new root.
+            parent = new_internal_node(parent);
+            parent->init_child(0, root());
+            mutable_root() = parent;
+            // If the former root was a leaf node, then it's now the rightmost node.
+            assert(!parent->child(0)->leaf() || parent->child(0) == rightmost_);
+        }
+
+        // Split the node.
+        node_type *split_node;
+        if (node->leaf()) {
+            split_node = new_leaf_node(parent);
+            node->split(insert_position, split_node, mutable_allocator());
+            if (rightmost_ == node) rightmost_ = split_node;
+        } else {
+            split_node = new_internal_node(parent);
+            node->split(insert_position, split_node, mutable_allocator());
+        }
+
+        if (insert_position > node->count()) {
+            insert_position = insert_position - node->count() - 1;
+            node = split_node;
+        }
+    }
+
+    template <typename P>
+    void btree<P>::merge_nodes(node_type *left, node_type *right) {
+        left->merge(right, mutable_allocator());
+        if (right->leaf()) {
+            if (rightmost_ == right) rightmost_ = left;
+            delete_leaf_node(right);
+        } else {
+            delete_internal_node(right);
+        }
+    }
+
+    template <typename P>
+    bool btree<P>::try_merge_or_rebalance(iterator *iter) {
+        node_type *parent = iter->node->parent();
+        if (iter->node->position() > 0) {
+            // Try merging with our left sibling.
+            node_type *left = parent->child(iter->node->position() - 1);
+            assert(left->max_count() == kNodeValues);
+            if ((1 + left->count() + iter->node->count()) <= kNodeValues) {
+                iter->position += 1 + left->count();
+                merge_nodes(left, iter->node);
+                iter->node = left;
+                return true;
+            }
+        }
+        if (iter->node->position() < parent->count()) {
+            // Try merging with our right sibling.
+            node_type *right = parent->child(iter->node->position() + 1);
+            assert(right->max_count() == kNodeValues);
+            if ((1 + iter->node->count() + right->count()) <= kNodeValues) {
+                merge_nodes(iter->node, right);
+                return true;
+            }
+            // Try rebalancing with our right sibling. We don't perform rebalancing if
+            // we deleted the first element from iter->node and the node is not
+            // empty. This is a small optimization for the common pattern of deleting
+            // from the front of the tree.
+            if ((right->count() > kMinNodeValues) &&
+                ((iter->node->count() == 0) ||
+                 (iter->position > 0))) {
+                int to_move = (right->count() - iter->node->count()) / 2;
+                to_move = (std::min)(to_move, right->count() - 1);
+                iter->node->rebalance_right_to_left(to_move, right, mutable_allocator());
+                return false;
+            }
+        }
+        if (iter->node->position() > 0) {
+            // Try rebalancing with our left sibling. We don't perform rebalancing if
+            // we deleted the last element from iter->node and the node is not
+            // empty. This is a small optimization for the common pattern of deleting
+            // from the back of the tree.
+            node_type *left = parent->child(iter->node->position() - 1);
+            if ((left->count() > kMinNodeValues) &&
+                ((iter->node->count() == 0) ||
+                 (iter->position < iter->node->count()))) {
+                int to_move = (left->count() - iter->node->count()) / 2;
+                to_move = (std::min)(to_move, left->count() - 1);
+                left->rebalance_left_to_right(to_move, iter->node, mutable_allocator());
+                iter->position += to_move;
+                return false;
+            }
+        }
+        return false;
+    }
+
+    template <typename P>
+    void btree<P>::try_shrink() {
+        if (root()->count() > 0) {
+            return;
+        }
+        // Deleted the last item on the root node, shrink the height of the tree.
+        if (root()->leaf()) {
+            assert(size() == 0);
+            delete_leaf_node(root());
+            mutable_root() = EmptyNode();
+            rightmost_ = EmptyNode();
+        } else {
+            node_type *child = root()->child(0);
+            child->make_root();
+            delete_internal_node(root());
+            mutable_root() = child;
+        }
+    }
+
+    template <typename P>
+    template <typename IterType>
+    inline IterType btree<P>::internal_last(IterType iter) {
+        assert(iter.node != nullptr);
+        while (iter.position == iter.node->count()) {
+            iter.position = iter.node->position();
+            iter.node = iter.node->parent();
+            if (iter.node->leaf()) {
+                iter.node = nullptr;
+                break;
+            }
+        }
+        return iter;
+    }
+
+    template <typename P>
+    template <typename... Args>
+    inline auto btree<P>::internal_emplace(iterator iter, Args &&... args)
+        -> iterator {
+        if (!iter.node->leaf()) {
+            // We can't insert on an internal node. Instead, we'll insert after the
+            // previous value which is guaranteed to be on a leaf node.
+            --iter;
+            ++iter.position;
+        }
+        const int max_count = iter.node->max_count();
+        if (iter.node->count() == max_count) {
+            // Make room in the leaf for the new item.
+            if (max_count < kNodeValues) {
+                // Insertion into the root where the root is smaller than the full node
+                // size. Simply grow the size of the root node.
+                assert(iter.node == root());
+                iter.node =
+                    new_leaf_root_node((std::min<int>)(kNodeValues, 2 * max_count));
+                iter.node->swap(root(), mutable_allocator());
+                delete_leaf_node(root());
+                mutable_root() = iter.node;
+                rightmost_ = iter.node;
+            } else {
+                rebalance_or_split(&iter);
+            }
+        }
+        iter.node->emplace_value(iter.position, mutable_allocator(),
+                                 std::forward<Args>(args)...);
+        ++size_;
+        return iter;
+    }
+
+    template <typename P>
+    template <typename K>
+    inline auto btree<P>::internal_locate(const K &key) const
+        -> SearchResult<iterator, is_key_compare_to::value> {
+        return internal_locate_impl(key, is_key_compare_to());
+    }
+
+    template <typename P>
+    template <typename K>
+    inline auto btree<P>::internal_locate_impl(
+        const K &key, std::false_type /* IsCompareTo */) const
+        -> SearchResult<iterator, false> {
+        iterator iter(const_cast<node_type *>(root()), 0);
+        for (;;) {
+            iter.position = iter.node->lower_bound(key, key_comp()).value;
+            // NOTE: we don't need to walk all the way down the tree if the keys are
+            // equal, but determining equality would require doing an extra comparison
+            // on each node on the way down, and we will need to go all the way to the
+            // leaf node in the expected case.
+            if (iter.node->leaf()) {
+                break;
+            }
+            iter.node = iter.node->child(iter.position);
+        }
+        return {iter};
+    }
+
+    template <typename P>
+    template <typename K>
+    inline auto btree<P>::internal_locate_impl(
+        const K &key, std::true_type /* IsCompareTo */) const
+        -> SearchResult<iterator, true> {
+        iterator iter(const_cast<node_type *>(root()), 0);
+        for (;;) {
+            SearchResult<int, true> res = iter.node->lower_bound(key, key_comp());
+            iter.position = res.value;
+            if (res.match == MatchKind::kEq) {
+                return {iter, MatchKind::kEq};
+            }
+            if (iter.node->leaf()) {
+                break;
+            }
+            iter.node = iter.node->child(iter.position);
+        }
+        return {iter, MatchKind::kNe};
+    }
+
+    template <typename P>
+    template <typename K>
+    auto btree<P>::internal_lower_bound(const K &key) const -> iterator {
+        iterator iter(const_cast<node_type *>(root()), 0);
+        for (;;) {
+            iter.position = iter.node->lower_bound(key, key_comp()).value;
+            if (iter.node->leaf()) {
+                break;
+            }
+            iter.node = iter.node->child(iter.position);
+        }
+        return internal_last(iter);
+    }
+
+    template <typename P>
+    template <typename K>
+    auto btree<P>::internal_upper_bound(const K &key) const -> iterator {
+        iterator iter(const_cast<node_type *>(root()), 0);
+        for (;;) {
+            iter.position = iter.node->upper_bound(key, key_comp());
+            if (iter.node->leaf()) {
+                break;
+            }
+            iter.node = iter.node->child(iter.position);
+        }
+        return internal_last(iter);
+    }
+
+    template <typename P>
+    template <typename K>
+    auto btree<P>::internal_find(const K &key) const -> iterator {
+        auto res = internal_locate(key);
+        if (res.HasMatch()) {
+            if (res.IsEq()) {
+                return res.value;
+            }
+        } else {
+            const iterator iter = internal_last(res.value);
+            if (iter.node != nullptr && !compare_keys(key, iter.key())) {
+                return iter;
+            }
+        }
+        return {nullptr, 0};
+    }
+
+    template <typename P>
+    void btree<P>::internal_clear(node_type *node) {
+        if (!node->leaf()) {
+            for (int i = 0; i <= node->count(); ++i) {
+                internal_clear(node->child(i));
+            }
+            delete_internal_node(node);
+        } else {
+            delete_leaf_node(node);
+        }
+    }
+
+    template <typename P>
+    int btree<P>::internal_verify(
+        const node_type *node, const key_type *lo, const key_type *hi) const {
+        assert(node->count() > 0);
+        assert(node->count() <= node->max_count());
+        if (lo) {
+            assert(!compare_keys(node->key(0), *lo));
+        }
+        if (hi) {
+            assert(!compare_keys(*hi, node->key(node->count() - 1)));
+        }
+        for (int i = 1; i < node->count(); ++i) {
+            assert(!compare_keys(node->key(i), node->key(i - 1)));
+        }
+        int count = node->count();
+        if (!node->leaf()) {
+            for (int i = 0; i <= node->count(); ++i) {
+                assert(node->child(i) != nullptr);
+                assert(node->child(i)->parent() == node);
+                assert(node->child(i)->position() == i);
+                count += internal_verify(
+                    node->child(i),
+                    (i == 0) ? lo : &node->key(i - 1),
+                    (i == node->count()) ? hi : &node->key(i));
+            }
+        }
+        return count;
+    }
+
+    // A common base class for btree_set, btree_map, btree_multiset, and btree_multimap.
+    // ---------------------------------------------------------------------------------
+    template <typename Tree>
+    class btree_container {
+        using params_type = typename Tree::params_type;
+
+    protected:
+        // Alias used for heterogeneous lookup functions.
+        // `key_arg<K>` evaluates to `K` when the functors are transparent and to
+        // `key_type` otherwise. It permits template argument deduction on `K` for the
+        // transparent case.
+        template <class K>
+        using key_arg =
+            typename KeyArg<IsTransparent<typename Tree::key_compare>::value>::
+            template type<K, typename Tree::key_type>;
+
+    public:
+        using key_type = typename Tree::key_type;
+        using value_type = typename Tree::value_type;
+        using size_type = typename Tree::size_type;
+        using difference_type = typename Tree::difference_type;
+        using key_compare = typename Tree::key_compare;
+        using value_compare = typename Tree::value_compare;
+        using allocator_type = typename Tree::allocator_type;
+        using reference = typename Tree::reference;
+        using const_reference = typename Tree::const_reference;
+        using pointer = typename Tree::pointer;
+        using const_pointer = typename Tree::const_pointer;
+        using iterator = typename Tree::iterator;
+        using const_iterator = typename Tree::const_iterator;
+        using reverse_iterator = typename Tree::reverse_iterator;
+        using const_reverse_iterator = typename Tree::const_reverse_iterator;
+        using node_type = typename Tree::node_handle_type;
+
+        // Constructors/assignments.
+        btree_container() : tree_(key_compare(), allocator_type()) {}
+        explicit btree_container(const key_compare &comp,
+                                 const allocator_type &alloc = allocator_type())
+            : tree_(comp, alloc) {}
+        btree_container(const btree_container &x) = default;
+        btree_container(btree_container &&x) noexcept = default;
+        btree_container &operator=(const btree_container &x) = default;
+        btree_container &operator=(btree_container &&x) noexcept(
+            std::is_nothrow_move_assignable<Tree>::value) = default;
+
+        // Iterator routines.
+        iterator begin()                       { return tree_.begin(); }
+        const_iterator begin() const           { return tree_.begin(); }
+        const_iterator cbegin() const          { return tree_.begin(); }
+        iterator end()                         { return tree_.end(); }
+        const_iterator end() const             { return tree_.end(); }
+        const_iterator cend() const            { return tree_.end(); }
+        reverse_iterator rbegin()              { return tree_.rbegin(); }
+        const_reverse_iterator rbegin() const  { return tree_.rbegin(); }
+        const_reverse_iterator crbegin() const { return tree_.rbegin(); }
+        reverse_iterator rend()                { return tree_.rend(); }
+        const_reverse_iterator rend() const    { return tree_.rend(); }
+        const_reverse_iterator crend() const   { return tree_.rend(); }
+
+        // Lookup routines.
+        template <typename K = key_type>
+        iterator find(const key_arg<K> &key) {
+            return tree_.find(key);
+        }
+        template <typename K = key_type>
+        const_iterator find(const key_arg<K> &key) const { return tree_.find(key); }
+
+        template <typename K = key_type>
+        bool contains(const key_arg<K> &key) const { return find(key) != end(); }
+
+        template <typename K = key_type>
+        iterator lower_bound(const key_arg<K> &key) { return tree_.lower_bound(key); }
+
+        template <typename K = key_type>
+        const_iterator lower_bound(const key_arg<K> &key) const { return tree_.lower_bound(key); }
+
+        template <typename K = key_type>
+        iterator upper_bound(const key_arg<K> &key) { return tree_.upper_bound(key); }
+
+        template <typename K = key_type>
+        const_iterator upper_bound(const key_arg<K> &key) const { return tree_.upper_bound(key); }
+
+        template <typename K = key_type>
+        std::pair<iterator, iterator> equal_range(const key_arg<K> &key) { return tree_.equal_range(key); }
+
+        template <typename K = key_type>
+        std::pair<const_iterator, const_iterator> equal_range(
+            const key_arg<K> &key) const {
+            return tree_.equal_range(key);
+        }
+
+        iterator erase(const_iterator iter) { return tree_.erase(iterator(iter)); }
+        iterator erase(iterator iter)       { return tree_.erase(iter); }
+        iterator erase(const_iterator first, const_iterator last) {
+            return tree_.erase(iterator(first), iterator(last)).second;
+        }
+
+        node_type extract(iterator position) {
+            // Use Move instead of Transfer, because the rebalancing code expects to
+            // have a valid object to scribble metadata bits on top of.
+            auto node = CommonAccess::Move<node_type>(get_allocator(), position.slot());
+            erase(position);
+            return node;
+        }
+
+        node_type extract(const_iterator position) {
+            return extract(iterator(position));
+        }
+
+    public:
+        void clear() { tree_.clear(); }
+        void swap(btree_container &x) { tree_.swap(x.tree_); }
+        void verify() const { tree_.verify(); }
+
+        size_type size() const { return tree_.size(); }
+        size_type max_size() const { return tree_.max_size(); }
+        bool empty() const { return tree_.empty(); }
+
+        friend bool operator==(const btree_container &x, const btree_container &y) {
+            if (x.size() != y.size()) return false;
+            return std::equal(x.begin(), x.end(), y.begin());
+        }
+
+        friend bool operator!=(const btree_container &x, const btree_container &y) { return !(x == y); }
+
+        friend bool operator<(const btree_container &x, const btree_container &y) {
+            return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end());
+        }
+
+        friend bool operator>(const btree_container &x, const btree_container &y) { return y < x; }
+
+        friend bool operator<=(const btree_container &x, const btree_container &y) { return !(y < x); }
+
+        friend bool operator>=(const btree_container &x, const btree_container &y) { return !(x < y); }
+
+        // The allocator used by the btree.
+        allocator_type get_allocator() const { return tree_.get_allocator(); }
+
+        // The key comparator used by the btree.
+        key_compare key_comp() const { return tree_.key_comp(); }
+        value_compare value_comp() const { return tree_.value_comp(); }
+
+        // Support absl::Hash.
+        template <typename State>
+        friend State AbslHashValue(State h, const btree_container &b) {
+            for (const auto &v : b) {
+                h = State::combine(std::move(h), v);
+            }
+            return State::combine(std::move(h), b.size());
+        }
+
+    protected:
+        Tree tree_;
+    };
+
+    // A common base class for btree_set and btree_map.
+    // -----------------------------------------------
+    template <typename Tree>
+    class btree_set_container : public btree_container<Tree> {
+        using super_type = btree_container<Tree>;
+        using params_type = typename Tree::params_type;
+        using init_type = typename params_type::init_type;
+        using is_key_compare_to = typename params_type::is_key_compare_to;
+        friend class BtreeNodePeer;
+
+    protected:
+        template <class K>
+        using key_arg = typename super_type::template key_arg<K>;
+
+    public:
+        using key_type = typename Tree::key_type;
+        using value_type = typename Tree::value_type;
+        using size_type = typename Tree::size_type;
+        using key_compare = typename Tree::key_compare;
+        using allocator_type = typename Tree::allocator_type;
+        using iterator = typename Tree::iterator;
+        using const_iterator = typename Tree::const_iterator;
+        using node_type = typename super_type::node_type;
+        using insert_return_type = InsertReturnType<iterator, node_type>;
+        using super_type::super_type;
+        btree_set_container() {}
+
+        template <class InputIterator>
+        btree_set_container(InputIterator b, InputIterator e,
+                            const key_compare &comp = key_compare(),
+                            const allocator_type &alloc = allocator_type())
+            : super_type(comp, alloc) {
+            insert(b, e);
+        }
+
+        btree_set_container(std::initializer_list<init_type> init,
+                            const key_compare &comp = key_compare(),
+                            const allocator_type &alloc = allocator_type())
+            : btree_set_container(init.begin(), init.end(), comp, alloc) {}
+
+        // Lookup routines.
+        template <typename K = key_type>
+        size_type count(const key_arg<K> &key) const {
+            return this->tree_.count_unique(key);
+        }
+
+        // Insertion routines.
+        std::pair<iterator, bool> insert(const value_type &x) {
+            return this->tree_.insert_unique(params_type::key(x), x);
+        }
+        std::pair<iterator, bool> insert(value_type &&x) {
+            return this->tree_.insert_unique(params_type::key(x), std::move(x));
+        }
+        template <typename... Args>
+        std::pair<iterator, bool> emplace(Args &&... args) {
+            init_type v(std::forward<Args>(args)...);
+            return this->tree_.insert_unique(params_type::key(v), std::move(v));
+        }
+        iterator insert(const_iterator position, const value_type &x) {
+            return this->tree_
+                .insert_hint_unique(iterator(position), params_type::key(x), x)
+                .first;
+        }
+        iterator insert(const_iterator position, value_type &&x) {
+            return this->tree_
+                .insert_hint_unique(iterator(position), params_type::key(x),
+                                    std::move(x))
+                .first;
+        }
+
+        template <typename... Args>
+        iterator emplace_hint(const_iterator position, Args &&... args) {
+            init_type v(std::forward<Args>(args)...);
+            return this->tree_
+                .insert_hint_unique(iterator(position), params_type::key(v),
+                                    std::move(v))
+                .first;
+        }
+
+        template <typename InputIterator>
+        void insert(InputIterator b, InputIterator e) {
+            this->tree_.insert_iterator_unique(b, e);
+        }
+
+        void insert(std::initializer_list<init_type> init) {
+            this->tree_.insert_iterator_unique(init.begin(), init.end());
+        }
+
+        insert_return_type insert(node_type &&node) {
+            if (!node) return {this->end(), false, node_type()};
+            std::pair<iterator, bool> res =
+                this->tree_.insert_unique(params_type::key(CommonAccess::GetSlot(node)),
+                                          CommonAccess::GetSlot(node));
+            if (res.second) {
+                CommonAccess::Destroy(&node);
+                return {res.first, true, node_type()};
+            } else {
+                return {res.first, false, std::move(node)};
+            }
+        }
+
+        iterator insert(const_iterator hint, node_type &&node) {
+            if (!node) return this->end();
+            std::pair<iterator, bool> res = this->tree_.insert_hint_unique(
+                iterator(hint), params_type::key(CommonAccess::GetSlot(node)),
+                CommonAccess::GetSlot(node));
+            if (res.second) CommonAccess::Destroy(&node);
+            return res.first;
+        }
+
+        template <typename K = key_type>
+        size_type erase(const key_arg<K> &key) { return this->tree_.erase_unique(key); }
+        using super_type::erase;
+
+        template <typename K = key_type>
+        node_type extract(const key_arg<K> &key) {
+            auto it = this->find(key);
+            return it == this->end() ? node_type() : extract(it);
+        }
+
+        using super_type::extract;
+
+        // Merge routines.
+        // Moves elements from `src` into `this`. If the element already exists in
+        // `this`, it is left unmodified in `src`.
+        template <
+            typename T,
+            typename phmap::enable_if_t<
+                phmap::conjunction<
+                    std::is_same<value_type, typename T::value_type>,
+                    std::is_same<allocator_type, typename T::allocator_type>,
+                    std::is_same<typename params_type::is_map_container,
+                                 typename T::params_type::is_map_container>>::value,
+                int> = 0>
+            void merge(btree_container<T> &src) {  // NOLINT
+            for (auto src_it = src.begin(); src_it != src.end();) {
+                if (insert(std::move(*src_it)).second) {
+                    src_it = src.erase(src_it);
+                } else {
+                    ++src_it;
+                }
+            }
+        }
+
+        template <
+            typename T,
+            typename phmap::enable_if_t<
+                phmap::conjunction<
+                    std::is_same<value_type, typename T::value_type>,
+                    std::is_same<allocator_type, typename T::allocator_type>,
+                    std::is_same<typename params_type::is_map_container,
+                                 typename T::params_type::is_map_container>>::value,
+                int> = 0>
+            void merge(btree_container<T> &&src) {
+            merge(src);
+        }
+    };
+
+    // Base class for btree_map.
+    // -------------------------
+    template <typename Tree>
+    class btree_map_container : public btree_set_container<Tree> {
+        using super_type = btree_set_container<Tree>;
+        using params_type = typename Tree::params_type;
+
+    protected:
+        template <class K>
+        using key_arg = typename super_type::template key_arg<K>;
+
+    public:
+        using key_type = typename Tree::key_type;
+        using mapped_type = typename params_type::mapped_type;
+        using value_type = typename Tree::value_type;
+        using key_compare = typename Tree::key_compare;
+        using allocator_type = typename Tree::allocator_type;
+        using iterator = typename Tree::iterator;
+        using const_iterator = typename Tree::const_iterator;
+
+        // Inherit constructors.
+        using super_type::super_type;
+        btree_map_container() {}
+
+        // Insertion routines.
+        template <typename... Args>
+        std::pair<iterator, bool> try_emplace(const key_type &k, Args &&... args) {
+            return this->tree_.insert_unique(
+                k, std::piecewise_construct, std::forward_as_tuple(k),
+                std::forward_as_tuple(std::forward<Args>(args)...));
+        }
+        template <typename... Args>
+        std::pair<iterator, bool> try_emplace(key_type &&k, Args &&... args) {
+            // Note: `key_ref` exists to avoid a ClangTidy warning about moving from `k`
+            // and then using `k` unsequenced. This is safe because the move is into a
+            // forwarding reference and insert_unique guarantees that `key` is never
+            // referenced after consuming `args`.
+            const key_type& key_ref = k;
+            return this->tree_.insert_unique(
+                key_ref, std::piecewise_construct, std::forward_as_tuple(std::move(k)),
+                std::forward_as_tuple(std::forward<Args>(args)...));
+        }
+        template <typename... Args>
+        iterator try_emplace(const_iterator hint, const key_type &k,
+                             Args &&... args) {
+            return this->tree_
+                .insert_hint_unique(iterator(hint), k, std::piecewise_construct,
+                                    std::forward_as_tuple(k),
+                                    std::forward_as_tuple(std::forward<Args>(args)...))
+                .first;
+        }
+        template <typename... Args>
+        iterator try_emplace(const_iterator hint, key_type &&k, Args &&... args) {
+            // Note: `key_ref` exists to avoid a ClangTidy warning about moving from `k`
+            // and then using `k` unsequenced. This is safe because the move is into a
+            // forwarding reference and insert_hint_unique guarantees that `key` is
+            // never referenced after consuming `args`.
+            const key_type& key_ref = k;
+            return this->tree_
+                .insert_hint_unique(iterator(hint), key_ref, std::piecewise_construct,
+                                    std::forward_as_tuple(std::move(k)),
+                                    std::forward_as_tuple(std::forward<Args>(args)...))
+                .first;
+        }
+        mapped_type &operator[](const key_type &k) {
+            return try_emplace(k).first->second;
+        }
+        mapped_type &operator[](key_type &&k) {
+            return try_emplace(std::move(k)).first->second;
+        }
+
+        template <typename K = key_type>
+        mapped_type &at(const key_arg<K> &key) {
+            auto it = this->find(key);
+            if (it == this->end())
+                base_internal::ThrowStdOutOfRange("phmap::btree_map::at");
+            return it->second;
+        }
+        template <typename K = key_type>
+        const mapped_type &at(const key_arg<K> &key) const {
+            auto it = this->find(key);
+            if (it == this->end())
+                base_internal::ThrowStdOutOfRange("phmap::btree_map::at");
+            return it->second;
+        }
+    };
+
+    // A common base class for btree_multiset and btree_multimap.
+    template <typename Tree>
+    class btree_multiset_container : public btree_container<Tree> {
+        using super_type = btree_container<Tree>;
+        using params_type = typename Tree::params_type;
+        using init_type = typename params_type::init_type;
+        using is_key_compare_to = typename params_type::is_key_compare_to;
+
+        template <class K>
+        using key_arg = typename super_type::template key_arg<K>;
+
+    public:
+        using key_type = typename Tree::key_type;
+        using value_type = typename Tree::value_type;
+        using size_type = typename Tree::size_type;
+        using key_compare = typename Tree::key_compare;
+        using allocator_type = typename Tree::allocator_type;
+        using iterator = typename Tree::iterator;
+        using const_iterator = typename Tree::const_iterator;
+        using node_type = typename super_type::node_type;
+
+        // Inherit constructors.
+        using super_type::super_type;
+        btree_multiset_container() {}
+
+        // Range constructor.
+        template <class InputIterator>
+        btree_multiset_container(InputIterator b, InputIterator e,
+                                 const key_compare &comp = key_compare(),
+                                 const allocator_type &alloc = allocator_type())
+            : super_type(comp, alloc) {
+            insert(b, e);
+        }
+
+        // Initializer list constructor.
+        btree_multiset_container(std::initializer_list<init_type> init,
+                                 const key_compare &comp = key_compare(),
+                                 const allocator_type &alloc = allocator_type())
+            : btree_multiset_container(init.begin(), init.end(), comp, alloc) {}
+
+        // Lookup routines.
+        template <typename K = key_type>
+        size_type count(const key_arg<K> &key) const {
+            return this->tree_.count_multi(key);
+        }
+
+        // Insertion routines.
+        iterator insert(const value_type &x) { return this->tree_.insert_multi(x); }
+        iterator insert(value_type &&x) {
+            return this->tree_.insert_multi(std::move(x));
+        }
+        iterator insert(const_iterator position, const value_type &x) {
+            return this->tree_.insert_hint_multi(iterator(position), x);
+        }
+        iterator insert(const_iterator position, value_type &&x) {
+            return this->tree_.insert_hint_multi(iterator(position), std::move(x));
+        }
+        template <typename InputIterator>
+        void insert(InputIterator b, InputIterator e) {
+            this->tree_.insert_iterator_multi(b, e);
+        }
+        void insert(std::initializer_list<init_type> init) {
+            this->tree_.insert_iterator_multi(init.begin(), init.end());
+        }
+        template <typename... Args>
+        iterator emplace(Args &&... args) {
+            return this->tree_.insert_multi(init_type(std::forward<Args>(args)...));
+        }
+        template <typename... Args>
+        iterator emplace_hint(const_iterator position, Args &&... args) {
+            return this->tree_.insert_hint_multi(
+                iterator(position), init_type(std::forward<Args>(args)...));
+        }
+        iterator insert(node_type &&node) {
+            if (!node) return this->end();
+            iterator res =
+                this->tree_.insert_multi(params_type::key(CommonAccess::GetSlot(node)),
+                                         CommonAccess::GetSlot(node));
+            CommonAccess::Destroy(&node);
+            return res;
+        }
+        iterator insert(const_iterator hint, node_type &&node) {
+            if (!node) return this->end();
+            iterator res = this->tree_.insert_hint_multi(
+                iterator(hint),
+                std::move(params_type::element(CommonAccess::GetSlot(node))));
+            CommonAccess::Destroy(&node);
+            return res;
+        }
+
+        // Deletion routines.
+        template <typename K = key_type>
+        size_type erase(const key_arg<K> &key) {
+            return this->tree_.erase_multi(key);
+        }
+        using super_type::erase;
+
+        // Node extraction routines.
+        template <typename K = key_type>
+        node_type extract(const key_arg<K> &key) {
+            auto it = this->find(key);
+            return it == this->end() ? node_type() : extract(it);
+        }
+        using super_type::extract;
+
+        // Merge routines.
+        // Moves all elements from `src` into `this`.
+        template <
+            typename T,
+            typename phmap::enable_if_t<
+                phmap::conjunction<
+                    std::is_same<value_type, typename T::value_type>,
+                    std::is_same<allocator_type, typename T::allocator_type>,
+                    std::is_same<typename params_type::is_map_container,
+                                 typename T::params_type::is_map_container>>::value,
+                int> = 0>
+            void merge(btree_container<T> &src) {  // NOLINT
+            insert(std::make_move_iterator(src.begin()),
+                   std::make_move_iterator(src.end()));
+            src.clear();
+        }
+
+        template <
+            typename T,
+            typename phmap::enable_if_t<
+                phmap::conjunction<
+                    std::is_same<value_type, typename T::value_type>,
+                    std::is_same<allocator_type, typename T::allocator_type>,
+                    std::is_same<typename params_type::is_map_container,
+                                 typename T::params_type::is_map_container>>::value,
+                int> = 0>
+            void merge(btree_container<T> &&src) {
+            merge(src);
+        }
+    };
+
+    // A base class for btree_multimap.
+    template <typename Tree>
+    class btree_multimap_container : public btree_multiset_container<Tree> {
+        using super_type = btree_multiset_container<Tree>;
+        using params_type = typename Tree::params_type;
+
+    public:
+        using mapped_type = typename params_type::mapped_type;
+
+        // Inherit constructors.
+        using super_type::super_type;
+        btree_multimap_container() {}
+    };
+
+}  // namespace priv
+
+
+
+    // ----------------------------------------------------------------------
+    //  btree_set - default values in phmap_fwd_decl.h
+    // ----------------------------------------------------------------------
+    template <typename Key, typename Compare, typename Alloc>
+    class btree_set : public priv::btree_set_container<
+        priv::btree<priv::set_params<
+            Key, Compare, Alloc, /*TargetNodeSize=*/ 256, /*Multi=*/ false>>> 
+    {
+        using Base = typename btree_set::btree_set_container;
+
+    public:
+        btree_set() {}
+        using Base::Base;
+        using Base::begin;
+        using Base::cbegin;
+        using Base::end;
+        using Base::cend;
+        using Base::empty;
+        using Base::max_size;
+        using Base::size;
+        using Base::clear;
+        using Base::erase;
+        using Base::insert;
+        using Base::emplace;
+        using Base::emplace_hint;
+        using Base::extract;
+        using Base::merge;
+        using Base::swap;
+        using Base::contains;
+        using Base::count;
+        using Base::equal_range;
+        using Base::find;
+        using Base::get_allocator;
+        using Base::key_comp;
+        using Base::value_comp;
+    };
+
+    // Swaps the contents of two `phmap::btree_set` containers.
+    // -------------------------------------------------------
+    template <typename K, typename C, typename A>
+    void swap(btree_set<K, C, A> &x, btree_set<K, C, A> &y) {
+        return x.swap(y);
+    }
+
+    // Erases all elements that satisfy the predicate pred from the container.
+    // ----------------------------------------------------------------------
+    template <typename K, typename C, typename A, typename Pred>
+    void erase_if(btree_set<K, C, A> &set, Pred pred) {
+        for (auto it = set.begin(); it != set.end();) {
+            if (pred(*it)) {
+                it = set.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    //  btree_multiset - default values in phmap_fwd_decl.h
+    // ----------------------------------------------------------------------
+    template <typename Key, typename Compare,  typename Alloc>
+        class btree_multiset : public priv::btree_multiset_container<
+        priv::btree<priv::set_params<
+             Key, Compare, Alloc, /*TargetNodeSize=*/ 256, /*Multi=*/ true>>> 
+    {
+        using Base = typename btree_multiset::btree_multiset_container;
+        
+    public:
+        btree_multiset() {}
+        using Base::Base;
+        using Base::begin;
+        using Base::cbegin;
+        using Base::end;
+        using Base::cend;
+        using Base::empty;
+        using Base::max_size;
+        using Base::size;
+        using Base::clear;
+        using Base::erase;
+        using Base::insert;
+        using Base::emplace;
+        using Base::emplace_hint;
+        using Base::extract;
+        using Base::merge;
+        using Base::swap;
+        using Base::contains;
+        using Base::count;
+        using Base::equal_range;
+        using Base::find;
+        using Base::get_allocator;
+        using Base::key_comp;
+        using Base::value_comp;
+    };
+
+    // Swaps the contents of two `phmap::btree_multiset` containers.
+    // ------------------------------------------------------------
+    template <typename K, typename C, typename A>
+    void swap(btree_multiset<K, C, A> &x, btree_multiset<K, C, A> &y) {
+        return x.swap(y);
+    }
+    
+    // Erases all elements that satisfy the predicate pred from the container.
+    // ----------------------------------------------------------------------
+    template <typename K, typename C, typename A, typename Pred>
+    void erase_if(btree_multiset<K, C, A> &set, Pred pred) {
+        for (auto it = set.begin(); it != set.end();) {
+            if (pred(*it)) {
+                it = set.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+
+    // ----------------------------------------------------------------------
+    //  btree_map - default values in phmap_fwd_decl.h
+    // ----------------------------------------------------------------------
+    template <typename Key, typename Value, typename Compare,  typename Alloc>
+        class btree_map : public priv::btree_map_container<
+        priv::btree<priv::map_params<
+             Key, Value, Compare, Alloc, /*TargetNodeSize=*/ 256, /*Multi=*/ false>>> 
+    {
+        using Base = typename btree_map::btree_map_container;
+
+    public:
+        btree_map() {}
+        using Base::Base;
+        using Base::begin;
+        using Base::cbegin;
+        using Base::end;
+        using Base::cend;
+        using Base::empty;
+        using Base::max_size;
+        using Base::size;
+        using Base::clear;
+        using Base::erase;
+        using Base::insert;
+        using Base::emplace;
+        using Base::emplace_hint;
+        using Base::try_emplace;
+        using Base::extract;
+        using Base::merge;
+        using Base::swap;
+        using Base::at;
+        using Base::contains;
+        using Base::count;
+        using Base::equal_range;
+        using Base::find;
+        using Base::operator[];
+        using Base::get_allocator;
+        using Base::key_comp;
+        using Base::value_comp;
+    };
+
+    // Swaps the contents of two `phmap::btree_map` containers.
+    // -------------------------------------------------------
+    template <typename K, typename V, typename C, typename A>
+    void swap(btree_map<K, V, C, A> &x, btree_map<K, V, C, A> &y) {
+        return x.swap(y);
+    }
+
+    // ----------------------------------------------------------------------
+    template <typename K, typename V, typename C, typename A, typename Pred>
+    void erase_if(btree_map<K, V, C, A> &map, Pred pred) {
+        for (auto it = map.begin(); it != map.end();) {
+            if (pred(*it)) {
+                it = map.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    //  btree_multimap - default values in phmap_fwd_decl.h
+    // ----------------------------------------------------------------------
+    template <typename Key, typename Value, typename Compare, typename Alloc>
+        class btree_multimap : public priv::btree_multimap_container<
+        priv::btree<priv::map_params<
+              Key, Value, Compare, Alloc, /*TargetNodeSize=*/ 256, /*Multi=*/ true>>> 
+    {
+        using Base = typename btree_multimap::btree_multimap_container;
+
+    public:
+        btree_multimap() {}
+        using Base::Base;
+        using Base::begin;
+        using Base::cbegin;
+        using Base::end;
+        using Base::cend;
+        using Base::empty;
+        using Base::max_size;
+        using Base::size;
+        using Base::clear;
+        using Base::erase;
+        using Base::insert;
+        using Base::emplace;
+        using Base::emplace_hint;
+        using Base::extract;
+        using Base::merge;
+        using Base::swap;
+        using Base::contains;
+        using Base::count;
+        using Base::equal_range;
+        using Base::find;
+        using Base::get_allocator;
+        using Base::key_comp;
+        using Base::value_comp;
+    };
+
+    // Swaps the contents of two `phmap::btree_multimap` containers.
+    // ------------------------------------------------------------
+    template <typename K, typename V, typename C, typename A>
+    void swap(btree_multimap<K, V, C, A> &x, btree_multimap<K, V, C, A> &y) {
+        return x.swap(y);
+    }
+
+    // Erases all elements that satisfy the predicate pred from the container.
+    // ----------------------------------------------------------------------
+    template <typename K, typename V, typename C, typename A, typename Pred>
+    void erase_if(btree_multimap<K, V, C, A> &map, Pred pred) {
+        for (auto it = map.begin(); it != map.end();) {
+            if (pred(*it)) {
+                it = map.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+
+}  // namespace btree
+
+#ifdef _MSC_VER
+     #pragma warning(pop)  
+#endif
+
+
+#endif  // PHMAP_BTREE_BTREE_CONTAINER_H_
diff --git a/include/parallel_hashmap/conanfile.py b/include/parallel_hashmap/conanfile.py
new file mode 100644
index 000000000..c046377d1
--- /dev/null
+++ b/include/parallel_hashmap/conanfile.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from conans import ConanFile, tools
+import os
+
+class SparseppConan(ConanFile):
+    name = "parallel_hashmap"
+    version = "1.27"
+    description = "A header-only, very fast and memory-friendly hash map"
+    
+    # Indicates License type of the packaged library
+    license = "https://github.com/greg7mdp/parallel-hashmap/blob/master/LICENSE"
+    
+    # Packages the license for the conanfile.py
+    exports = ["LICENSE"]
+    
+    # Custom attributes for Bincrafters recipe conventions
+    source_subfolder = "source_subfolder"
+    
+    def source(self):
+        source_url = "https://github.com/greg7mdp/parallel-hashmap"
+        tools.get("{0}/archive/{1}.tar.gz".format(source_url, self.version))
+        extracted_dir = self.name + "-" + self.version
+
+        #Rename to "source_folder" is a convention to simplify later steps
+        os.rename(extracted_dir, self.source_subfolder)
+
+
+    def package(self):
+        include_folder = os.path.join(self.source_subfolder, "parallel_hashmap")
+        self.copy(pattern="LICENSE")
+        self.copy(pattern="*", dst="include/parallel_hashmap", src=include_folder)
+
+    def package_id(self):
+        self.info.header_only()
diff --git a/include/parallel_hashmap/phmap.h b/include/parallel_hashmap/phmap.h
index 7c1cd95a0..463653358 100644
--- a/include/parallel_hashmap/phmap.h
+++ b/include/parallel_hashmap/phmap.h
@@ -74,17 +74,17 @@
 
 namespace phmap {
 
-namespace container_internal {
+namespace priv {
 
 // --------------------------------------------------------------------------
 template <size_t Width>
 class probe_seq 
 {
 public:
-    probe_seq(size_t hash, size_t mask) {
+    probe_seq(size_t hashval, size_t mask) {
         assert(((mask + 1) & mask) == 0 && "not a mask");
         mask_ = mask;
-        offset_ = hash & mask_;
+        offset_ = hashval & mask_;
     }
     size_t offset() const { return offset_; }
     size_t offset(size_t i) const { return (offset_ + i) & mask_; }
@@ -137,18 +137,19 @@ constexpr bool IsNoThrowSwappable() {
 // --------------------------------------------------------------------------
 template <typename T>
 int TrailingZeros(T x) {
-  return sizeof(T) == 8 ? base_internal::CountTrailingZerosNonZero64(
-                              static_cast<uint64_t>(x))
-                        : base_internal::CountTrailingZerosNonZero32(
-                              static_cast<uint32_t>(x));
+    PHMAP_IF_CONSTEXPR(sizeof(T) == 8)
+        return base_internal::CountTrailingZerosNonZero64(static_cast<uint64_t>(x));
+    else
+        return base_internal::CountTrailingZerosNonZero32(static_cast<uint32_t>(x));
 }
 
 // --------------------------------------------------------------------------
 template <typename T>
 int LeadingZeros(T x) {
-  return sizeof(T) == 8
-             ? base_internal::CountLeadingZeros64(static_cast<uint64_t>(x))
-             : base_internal::CountLeadingZeros32(static_cast<uint32_t>(x));
+    PHMAP_IF_CONSTEXPR(sizeof(T) == 8)
+        return base_internal::CountLeadingZeros64(static_cast<uint64_t>(x));
+    else
+        return base_internal::CountLeadingZeros32(static_cast<uint32_t>(x));
 }
 
 // --------------------------------------------------------------------------
@@ -182,10 +183,10 @@ class BitMask
     explicit operator bool() const { return mask_ != 0; }
     int operator*() const { return LowestBitSet(); }
     int LowestBitSet() const {
-        return container_internal::TrailingZeros(mask_) >> Shift;
+        return priv::TrailingZeros(mask_) >> Shift;
     }
     int HighestBitSet() const {
-        return (sizeof(T) * CHAR_BIT - container_internal::LeadingZeros(mask_) -
+        return (sizeof(T) * CHAR_BIT - priv::LeadingZeros(mask_) -
                 1) >>
             Shift;
     }
@@ -194,13 +195,13 @@ class BitMask
     BitMask end() const { return BitMask(0); }
 
     int TrailingZeros() const {
-        return container_internal::TrailingZeros(mask_) >> Shift;
+        return priv::TrailingZeros(mask_) >> Shift;
     }
 
     int LeadingZeros() const {
         constexpr int total_significant_bits = SignificantBits << Shift;
         constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits;
-        return container_internal::LeadingZeros(mask_ << extra_bits) >> Shift;
+        return priv::LeadingZeros(mask_ << extra_bits) >> Shift;
     }
 
 private:
@@ -270,22 +271,22 @@ inline size_t HashSeed(const ctrl_t* ctrl) {
 
 #ifdef PHMAP_NON_DETERMINISTIC
 
-inline size_t H1(size_t hash, const ctrl_t* ctrl) {
+inline size_t H1(size_t hashval, const ctrl_t* ctrl) {
     // use ctrl_ pointer to add entropy to ensure
     // non-deterministic iteration order.
-    return (hash >> 7) ^ HashSeed(ctrl);
+    return (hashval >> 7) ^ HashSeed(ctrl);
 }
 
 #else
 
-inline size_t H1(size_t hash, const ctrl_t* ) {
-    return (hash >> 7);
+inline size_t H1(size_t hashval, const ctrl_t* ) {
+    return (hashval >> 7);
 }
 
 #endif
 
 
-inline ctrl_t H2(size_t hash)          { return (ctrl_t)(hash & 0x7F); }
+inline ctrl_t H2(size_t hashval)       { return (ctrl_t)(hashval & 0x7F); }
 
 inline bool IsEmpty(ctrl_t c)          { return c == kEmpty; }
 inline bool IsFull(ctrl_t c)           { return c >= 0; }
@@ -795,7 +796,7 @@ class raw_hash_set
     auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k));
     auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k));
 
-    using Layout = phmap::container_internal::Layout<ctrl_t, slot_type>;
+    using Layout = phmap::priv::Layout<ctrl_t, slot_type>;
 
     static Layout MakeLayout(size_t capacity) {
         assert(IsValidCapacity(capacity));
@@ -879,6 +880,25 @@ class raw_hash_set
             return tmp;
         }
 
+#if PHMAP_BIDIRECTIONAL
+        // PRECONDITION: not a begin() iterator.
+        iterator& operator--() {
+            assert(ctrl_);
+            do {
+                --ctrl_;
+                --slot_;
+            } while (IsEmptyOrDeleted(*ctrl_));
+            return *this;
+        }
+
+        // PRECONDITION: not a begin() iterator.
+        iterator operator--(int) {
+            auto tmp = *this;
+            --*this;
+            return tmp;
+        }
+#endif
+
         friend bool operator==(const iterator& a, const iterator& b) {
             return a.ctrl_ == b.ctrl_;
         }
@@ -903,7 +923,7 @@ class raw_hash_set
         }
 
         ctrl_t* ctrl_ = nullptr;
-        // To avoid uninitialized member warnigs, put slot_ in an anonymous union.
+        // To avoid uninitialized member warnings, put slot_ in an anonymous union.
         // The member is not initialized on singleton and end iterators.
         union {
             slot_type* slot_;
@@ -956,44 +976,45 @@ class raw_hash_set
         std::is_nothrow_default_constructible<key_equal>::value&&
         std::is_nothrow_default_constructible<allocator_type>::value) {}
 
-    explicit raw_hash_set(size_t bucket_count, const hasher& hash = hasher(),
+    explicit raw_hash_set(size_t bucket_cnt, const hasher& hashfn = hasher(),
                           const key_equal& eq = key_equal(),
                           const allocator_type& alloc = allocator_type())
-        : ctrl_(EmptyGroup()), settings_(0, hash, eq, alloc) {
-        if (bucket_count) {
-            capacity_ = NormalizeCapacity(bucket_count);
-            reset_growth_left();
-            initialize_slots();
+        : ctrl_(EmptyGroup()), settings_(0, hashfn, eq, alloc) {
+        if (bucket_cnt) {
+            size_t new_capacity = NormalizeCapacity(bucket_cnt);
+            reset_growth_left(new_capacity);
+            initialize_slots(new_capacity);
+            capacity_ = new_capacity;
         }
     }
 
-    raw_hash_set(size_t bucket_count, const hasher& hash,
+    raw_hash_set(size_t bucket_cnt, const hasher& hashfn,
                  const allocator_type& alloc)
-        : raw_hash_set(bucket_count, hash, key_equal(), alloc) {}
+        : raw_hash_set(bucket_cnt, hashfn, key_equal(), alloc) {}
 
-    raw_hash_set(size_t bucket_count, const allocator_type& alloc)
-        : raw_hash_set(bucket_count, hasher(), key_equal(), alloc) {}
+    raw_hash_set(size_t bucket_cnt, const allocator_type& alloc)
+        : raw_hash_set(bucket_cnt, hasher(), key_equal(), alloc) {}
 
     explicit raw_hash_set(const allocator_type& alloc)
         : raw_hash_set(0, hasher(), key_equal(), alloc) {}
 
     template <class InputIter>
-    raw_hash_set(InputIter first, InputIter last, size_t bucket_count = 0,
-                 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
+    raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt = 0,
+                 const hasher& hashfn = hasher(), const key_equal& eq = key_equal(),
                  const allocator_type& alloc = allocator_type())
-        : raw_hash_set(bucket_count, hash, eq, alloc) {
+        : raw_hash_set(bucket_cnt, hashfn, eq, alloc) {
         insert(first, last);
     }
 
     template <class InputIter>
-    raw_hash_set(InputIter first, InputIter last, size_t bucket_count,
-                 const hasher& hash, const allocator_type& alloc)
-        : raw_hash_set(first, last, bucket_count, hash, key_equal(), alloc) {}
+    raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt,
+                 const hasher& hashfn, const allocator_type& alloc)
+        : raw_hash_set(first, last, bucket_cnt, hashfn, key_equal(), alloc) {}
 
     template <class InputIter>
-    raw_hash_set(InputIter first, InputIter last, size_t bucket_count,
+    raw_hash_set(InputIter first, InputIter last, size_t bucket_cnt,
                  const allocator_type& alloc)
-        : raw_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {}
+        : raw_hash_set(first, last, bucket_cnt, hasher(), key_equal(), alloc) {}
 
     template <class InputIter>
     raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc)
@@ -1021,33 +1042,33 @@ class raw_hash_set
     //
     // RequiresNotInit<T> is a workaround for gcc prior to 7.1.
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    raw_hash_set(std::initializer_list<T> init, size_t bucket_count = 0,
-                 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
+    raw_hash_set(std::initializer_list<T> init, size_t bucket_cnt = 0,
+                 const hasher& hashfn = hasher(), const key_equal& eq = key_equal(),
                  const allocator_type& alloc = allocator_type())
-        : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {}
+        : raw_hash_set(init.begin(), init.end(), bucket_cnt, hashfn, eq, alloc) {}
 
-    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count = 0,
-                 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
+    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt = 0,
+                 const hasher& hashfn = hasher(), const key_equal& eq = key_equal(),
                  const allocator_type& alloc = allocator_type())
-        : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {}
+        : raw_hash_set(init.begin(), init.end(), bucket_cnt, hashfn, eq, alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    raw_hash_set(std::initializer_list<T> init, size_t bucket_count,
-                 const hasher& hash, const allocator_type& alloc)
-        : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {}
+    raw_hash_set(std::initializer_list<T> init, size_t bucket_cnt,
+                 const hasher& hashfn, const allocator_type& alloc)
+        : raw_hash_set(init, bucket_cnt, hashfn, key_equal(), alloc) {}
 
-    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
-                 const hasher& hash, const allocator_type& alloc)
-        : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {}
+    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt,
+                 const hasher& hashfn, const allocator_type& alloc)
+        : raw_hash_set(init, bucket_cnt, hashfn, key_equal(), alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    raw_hash_set(std::initializer_list<T> init, size_t bucket_count,
+    raw_hash_set(std::initializer_list<T> init, size_t bucket_cnt,
                  const allocator_type& alloc)
-        : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
+        : raw_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {}
 
-    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
+    raw_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt,
                  const allocator_type& alloc)
-        : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
+        : raw_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
     raw_hash_set(std::initializer_list<T> init, const allocator_type& alloc)
@@ -1067,11 +1088,11 @@ class raw_hash_set
         // Because the table is guaranteed to be empty, we can do something faster
         // than a full `insert`.
         for (const auto& v : that) {
-            const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v);
-            auto target = find_first_non_full(hash);
-            set_ctrl(target.offset, H2(hash));
+            const size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, v);
+            auto target = find_first_non_full(hashval);
+            set_ctrl(target.offset, H2(hashval));
             emplace_at(target.offset, v);
-            infoz_.RecordInsert(hash, target.probe_length);
+            infoz_.RecordInsert(hashval, target.probe_length);
         }
         size_ = that.size();
         growth_left() -= that.size();
@@ -1142,7 +1163,14 @@ class raw_hash_set
         it.skip_empty_or_deleted();
         return it;
     }
-    iterator end() { return {ctrl_ + capacity_}; }
+    iterator end() 
+    {
+#if PHMAP_BIDIRECTIONAL
+        return iterator_at(capacity_); 
+#else
+        return {ctrl_ + capacity_};
+#endif
+    }
 
     const_iterator begin() const {
         return const_cast<raw_hash_set*>(this)->begin();
@@ -1175,8 +1203,8 @@ class raw_hash_set
                 }
             }
             size_ = 0;
-            reset_ctrl();
-            reset_growth_left();
+            reset_ctrl(capacity_);
+            reset_growth_left(capacity_);
         }
         assert(empty());
         infoz_.RecordStorageChanged(0, capacity_);
@@ -1208,9 +1236,8 @@ class raw_hash_set
     // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace
     // RequiresInsertable<T> with RequiresInsertable<const T&>.
     // We are hitting this bug: https://godbolt.org/g/1Vht4f.
-    template <
-        class T, RequiresInsertable<T> = 0,
-        typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
+    template <class T, RequiresInsertable<T> = 0,
+              typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
     std::pair<iterator, bool> insert(const T& value) {
         return emplace(value);
     }
@@ -1234,9 +1261,8 @@ class raw_hash_set
     // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace
     // RequiresInsertable<T> with RequiresInsertable<const T&>.
     // We are hitting this bug: https://godbolt.org/g/1Vht4f.
-    template <
-        class T, RequiresInsertable<T> = 0,
-        typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
+    template <class T, RequiresInsertable<T> = 0,
+              typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
     iterator insert(const_iterator, const T& value) {
         return insert(value).first;
     }
@@ -1245,9 +1271,36 @@ class raw_hash_set
         return insert(std::move(value)).first;
     }
 
-    template <class InputIt>
+    template <typename It>
+    using IsRandomAccess = std::is_same<typename std::iterator_traits<It>::iterator_category,
+                                        std::random_access_iterator_tag>;
+
+
+    template<typename T>
+    struct has_difference_operator
+    {
+    private:
+        using yes = std::true_type;
+        using no  = std::false_type;
+ 
+        template<typename U> static auto test(int) -> decltype(std::declval<U>() - std::declval<U>() == 1, yes());
+        template<typename>   static no   test(...);
+ 
+    public:
+        static constexpr bool value = std::is_same<decltype(test<T>(0)), yes>::value;
+    };
+
+    template <class InputIt, typename phmap::enable_if_t<has_difference_operator<InputIt>::value, int> = 0>
     void insert(InputIt first, InputIt last) {
-        for (; first != last; ++first) insert(*first);
+        this->reserve(this->size() + (last - first));
+        for (; first != last; ++first) 
+            emplace(*first);
+    }
+
+    template <class InputIt, typename phmap::enable_if_t<!has_difference_operator<InputIt>::value, int> = 0>
+    void insert(InputIt first, InputIt last) {
+        for (; first != last; ++first) 
+            emplace(*first);
     }
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<const T&> = 0>
@@ -1273,11 +1326,11 @@ class raw_hash_set
         }
     }
 
-    insert_return_type insert(node_type&& node, size_t hash) {
+    insert_return_type insert(node_type&& node, size_t hashval) {
         if (!node) return {end(), false, node_type()};
         const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node));
         auto res = PolicyTraits::apply(
-            InsertSlotWithHash<false>{*this, std::move(*CommonAccess::GetSlot(node)), hash},
+            InsertSlotWithHash<false>{*this, std::move(*CommonAccess::GetSlot(node)), hashval},
             elem);
         if (res.second) {
             CommonAccess::Reset(&node);
@@ -1307,6 +1360,11 @@ class raw_hash_set
                                    std::forward<Args>(args)...);
     }
 
+    template <class... Args, typename std::enable_if<IsDecomposable<Args...>::value, int>::type = 0>
+    std::pair<iterator, bool> emplace_with_hash(size_t hashval, Args&&... args) {
+        return PolicyTraits::apply(EmplaceDecomposableHashval{*this, hashval}, std::forward<Args>(args)...);
+    }
+
     // This overload kicks in if we cannot deduce the key from args. It constructs
     // value_type unconditionally and then either moves it into the table or
     // destroys.
@@ -1322,11 +1380,26 @@ class raw_hash_set
         return PolicyTraits::apply(InsertSlot<true>{*this, std::move(*slot)}, elem);
     }
 
+    template <class... Args, typename std::enable_if<!IsDecomposable<Args...>::value, int>::type = 0>
+    std::pair<iterator, bool> emplace_with_hash(size_t hashval, Args&&... args) {
+        typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type raw;
+        slot_type* slot = reinterpret_cast<slot_type*>(&raw);
+
+        PolicyTraits::construct(&alloc_ref(), slot, std::forward<Args>(args)...);
+        const auto& elem = PolicyTraits::element(slot);
+        return PolicyTraits::apply(InsertSlotWithHash<true>{*this, std::move(*slot), hashval}, elem);
+    }
+
     template <class... Args>
     iterator emplace_hint(const_iterator, Args&&... args) {
         return emplace(std::forward<Args>(args)...).first;
     }
 
+    template <class... Args>
+    iterator emplace_hint_with_hash(size_t hashval, const_iterator, Args&&... args) {
+        return emplace_with_hash(hashval, std::forward<Args>(args)...).first;
+    }
+
     // Extension API: support for lazy emplace.
     //
     // Looks up key in the table. If found, returns the iterator to the element.
@@ -1372,24 +1445,36 @@ class raw_hash_set
     iterator lazy_emplace(const key_arg<K>& key, F&& f) {
         auto res = find_or_prepare_insert(key);
         if (res.second) {
-            slot_type* slot = slots_ + res.first;
-            std::forward<F>(f)(constructor(&alloc_ref(), &slot));
-            assert(!slot);
+            lazy_emplace_at(res.first, std::forward<F>(f));
         }
         return iterator_at(res.first);
     }
 
     template <class K = key_type, class F>
-    iterator lazy_emplace_with_hash(const key_arg<K>& key, size_t &hash, F&& f) {
-        auto res = find_or_prepare_insert(key, hash);
+    iterator lazy_emplace_with_hash(const key_arg<K>& key, size_t &hashval, F&& f) {
+        auto res = find_or_prepare_insert(key, hashval);
         if (res.second) {
-            slot_type* slot = slots_ + res.first;
-            std::forward<F>(f)(constructor(&alloc_ref(), &slot));
-            assert(!slot);
+            lazy_emplace_at(res.first, std::forward<F>(f));
         }
         return iterator_at(res.first);
     }
 
+    template <class K = key_type, class F>
+    void lazy_emplace_at(size_t& idx, F&& f) {
+        slot_type* slot = slots_ + idx;
+        std::forward<F>(f)(constructor(&alloc_ref(), &slot));
+        assert(!slot);
+    }
+
+    template <class K = key_type, class F>
+    void emplace_single_with_hash(const key_arg<K>& key, size_t &hashval, F&& f) {
+        auto res = find_or_prepare_insert(key, hashval);
+        if (res.second)
+            lazy_emplace_at(res.first, std::forward<F>(f));
+        else
+            _erase(iterator_at(res.first));
+    }
+
 
     // Extension API: support for heterogeneous keys.
     //
@@ -1432,7 +1517,12 @@ class raw_hash_set
 
     // This overload is necessary because otherwise erase<K>(const K&) would be
     // a better match if non-const iterator is passed as an argument.
-    iterator erase(iterator it) { _erase(it++); return it; }
+    iterator erase(iterator it) {
+        auto res = it;
+        ++res;
+        _erase(it);
+        return res;
+    }
 
     iterator erase(const_iterator first, const_iterator last) {
         while (first != last) {
@@ -1496,12 +1586,12 @@ class raw_hash_set
         }
     }
 
-#ifndef PHMAP_NON_DETERMINISTIC
+#if !defined(PHMAP_NON_DETERMINISTIC)
     template<typename OutputArchive>
-    bool dump(OutputArchive&) const;
+    bool phmap_dump(OutputArchive&) const;
 
     template<typename InputArchive>
-    bool load(InputArchive&);
+    bool  phmap_load(InputArchive&);
 #endif
 
     void rehash(size_t n) {
@@ -1541,10 +1631,14 @@ class raw_hash_set
     //
     // NOTE: This is a very low level operation and should not be used without
     // specific benchmarks indicating its importance.
-    void prefetch_hash(size_t hash) const {
-        (void)hash;
-#if defined(__GNUC__)
-        auto seq = probe(hash);
+    void prefetch_hash(size_t hashval) const {
+        (void)hashval;
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+        auto seq = probe(hashval);
+        _mm_prefetch((const char *)(ctrl_ + seq.offset()), _MM_HINT_NTA);
+        _mm_prefetch((const char *)(slots_ + seq.offset()), _MM_HINT_NTA);
+#elif defined(__GNUC__)
+        auto seq = probe(hashval);
         __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset()));
         __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset()));
 #endif  // __GNUC__
@@ -1552,7 +1646,7 @@ class raw_hash_set
 
     template <class K = key_type>
     void prefetch(const key_arg<K>& key) const {
-        prefetch_hash(HashElement{hash_ref()}(key));
+        prefetch_hash(this->hash(key));
     }
 
     // The API of find() has two extensions.
@@ -1563,33 +1657,35 @@ class raw_hash_set
     // 2. The type of the key argument doesn't have to be key_type. This is so
     // called heterogeneous key support.
     template <class K = key_type>
-    iterator find(const key_arg<K>& key, size_t hash) {
-        auto seq = probe(hash);
-        while (true) {
-            Group g{ctrl_ + seq.offset()};
-            for (int i : g.Match((h2_t)H2(hash))) {
-                if (PHMAP_PREDICT_TRUE(PolicyTraits::apply(
-                                          EqualElement<K>{key, eq_ref()},
-                                          PolicyTraits::element(slots_ + seq.offset((size_t)i)))))
-                    return iterator_at(seq.offset((size_t)i));
-            }
-            if (PHMAP_PREDICT_TRUE(g.MatchEmpty())) 
-                return end();
-            seq.next();
-        }
+    iterator find(const key_arg<K>& key, size_t hashval) {
+        size_t offset;
+        if (find_impl(key, hashval, offset))
+            return iterator_at(offset);
+        else
+            return end();
     }
+
+    template <class K = key_type>
+    pointer find_ptr(const key_arg<K>& key, size_t hashval) {
+        size_t offset;
+        if (find_impl(key, hashval, offset))
+            return &PolicyTraits::element(slots_ + offset);
+        else
+            return nullptr;
+    }
+
     template <class K = key_type>
     iterator find(const key_arg<K>& key) {
-        return find(key, HashElement{hash_ref()}(key));
+        return find(key, this->hash(key));
     }
 
     template <class K = key_type>
-    const_iterator find(const key_arg<K>& key, size_t hash) const {
-        return const_cast<raw_hash_set*>(this)->find(key, hash);
+    const_iterator find(const key_arg<K>& key, size_t hashval) const {
+        return const_cast<raw_hash_set*>(this)->find(key, hashval);
     }
     template <class K = key_type>
     const_iterator find(const key_arg<K>& key) const {
-        return find(key, HashElement{hash_ref()}(key));
+        return find(key, this->hash(key));
     }
 
     template <class K = key_type>
@@ -1597,6 +1693,11 @@ class raw_hash_set
         return find(key) != end();
     }
 
+    template <class K = key_type>
+    bool contains(const key_arg<K>& key, size_t hashval) const {
+        return find(key, hashval) != end();
+    }
+
     template <class K = key_type>
     std::pair<iterator, iterator> equal_range(const key_arg<K>& key) {
         auto it = find(key);
@@ -1620,7 +1721,7 @@ class raw_hash_set
         // Does nothing.
     }
 
-    hasher hash_function() const { return hash_ref(); }
+    hasher hash_function() const { return hash_ref(); } // warning: doesn't match internal hash - use hash() member function
     key_equal key_eq() const { return eq_ref(); }
     allocator_type get_allocator() const { return alloc_ref(); }
 
@@ -1644,9 +1745,32 @@ class raw_hash_set
         a.swap(b);
     }
 
+    template <class K>
+    size_t hash(const K& key) const {
+        return HashElement{hash_ref()}(key);
+    }
+
 private:
     template <class Container, typename Enabler>
-    friend struct phmap::container_internal::hashtable_debug_internal::HashtableDebugAccess;
+    friend struct phmap::priv::hashtable_debug_internal::HashtableDebugAccess;
+
+    template <class K = key_type>
+    bool find_impl(const key_arg<K>& key, size_t hashval, size_t& offset) {
+        auto seq = probe(hashval);
+        while (true) {
+            Group g{ ctrl_ + seq.offset() };
+            for (int i : g.Match((h2_t)H2(hashval))) {
+                offset = seq.offset((size_t)i);
+                if (PHMAP_PREDICT_TRUE(PolicyTraits::apply(
+                    EqualElement<K>{key, eq_ref()},
+                    PolicyTraits::element(slots_ + offset))))
+                    return true;
+            }
+            if (PHMAP_PREDICT_TRUE(g.MatchEmpty()))
+                return false;
+            seq.next();
+        }
+    }
 
     struct FindElement 
     {
@@ -1678,10 +1802,10 @@ class raw_hash_set
     };
 
     template <class K, class... Args>
-    std::pair<iterator, bool> emplace_decomposable(const K& key, size_t hash, 
+    std::pair<iterator, bool> emplace_decomposable(const K& key, size_t hashval, 
                                                    Args&&... args)
     {
-        auto res = find_or_prepare_insert(key, hash);
+        auto res = find_or_prepare_insert(key, hashval);
         if (res.second) {
             emplace_at(res.first, std::forward<Args>(args)...);
         }
@@ -1692,12 +1816,20 @@ class raw_hash_set
     {
         template <class K, class... Args>
         std::pair<iterator, bool> operator()(const K& key, Args&&... args) const {
-            return s.emplace_decomposable(key, typename raw_hash_set::HashElement{s.hash_ref()}(key),
-                                          std::forward<Args>(args)...);
+            return s.emplace_decomposable(key, s.hash(key), std::forward<Args>(args)...);
         }
         raw_hash_set& s;
     };
 
+    struct EmplaceDecomposableHashval {
+        template <class K, class... Args>
+        std::pair<iterator, bool> operator()(const K& key, Args&&... args) const {
+            return s.emplace_decomposable(key, hashval, std::forward<Args>(args)...);
+        }
+        raw_hash_set& s;
+        size_t hashval;
+    };
+
     template <bool do_destroy>
     struct InsertSlot 
     {
@@ -1721,7 +1853,7 @@ class raw_hash_set
     {
         template <class K, class... Args>
         std::pair<iterator, bool> operator()(const K& key, Args&&...) && {
-            auto res = s.find_or_prepare_insert(key, hash);
+            auto res = s.find_or_prepare_insert(key, hashval);
             if (res.second) {
                 PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot);
             } else if (do_destroy) {
@@ -1732,7 +1864,7 @@ class raw_hash_set
         raw_hash_set& s;
         // Constructed slot. Either moved into place or destroyed.
         slot_type&& slot;
-        size_t &hash;
+        size_t &hashval;
     };
 
     // "erases" the object from the container, except that it doesn't actually
@@ -1760,21 +1892,21 @@ class raw_hash_set
         infoz_.RecordErase();
     }
 
-    void initialize_slots() {
-        assert(capacity_);
+    void initialize_slots(size_t new_capacity) {
+        assert(new_capacity);
         if (std::is_same<SlotAlloc, std::allocator<slot_type>>::value && 
             slots_ == nullptr) {
             infoz_ = Sample();
         }
 
-        auto layout = MakeLayout(capacity_);
+        auto layout = MakeLayout(new_capacity);
         char* mem = static_cast<char*>(
             Allocate<Layout::Alignment()>(&alloc_ref(), layout.AllocSize()));
         ctrl_ = reinterpret_cast<ctrl_t*>(layout.template Pointer<0>(mem));
         slots_ = layout.template Pointer<1>(mem);
-        reset_ctrl();
-        reset_growth_left();
-        infoz_.RecordStorageChanged(size_, capacity_);
+        reset_ctrl(new_capacity);
+        reset_growth_left(new_capacity);
+        infoz_.RecordStorageChanged(size_, new_capacity);
     }
 
     void destroy_slots() {
@@ -1800,16 +1932,16 @@ class raw_hash_set
         auto* old_ctrl = ctrl_;
         auto* old_slots = slots_;
         const size_t old_capacity = capacity_;
+        initialize_slots(new_capacity);
         capacity_ = new_capacity;
-        initialize_slots();
 
         for (size_t i = 0; i != old_capacity; ++i) {
             if (IsFull(old_ctrl[i])) {
-                size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
-                                                  PolicyTraits::element(old_slots + i));
-                auto target = find_first_non_full(hash);
+                size_t hashval = PolicyTraits::apply(HashElement{hash_ref()},
+                                                     PolicyTraits::element(old_slots + i));
+                auto target = find_first_non_full(hashval);
                 size_t new_i = target.offset;
-                set_ctrl(new_i, H2(hash));
+                set_ctrl(new_i, H2(hashval));
                 PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i);
             }
         }
@@ -1847,33 +1979,33 @@ class raw_hash_set
         slot_type* slot = reinterpret_cast<slot_type*>(&raw);
         for (size_t i = 0; i != capacity_; ++i) {
             if (!IsDeleted(ctrl_[i])) continue;
-            size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
-                                              PolicyTraits::element(slots_ + i));
-            auto target = find_first_non_full(hash);
+            size_t hashval = PolicyTraits::apply(HashElement{hash_ref()},
+                                                 PolicyTraits::element(slots_ + i));
+            auto target = find_first_non_full(hashval);
             size_t new_i = target.offset;
 
-            // Verify if the old and new i fall within the same group wrt the hash.
+            // Verify if the old and new i fall within the same group wrt the hashval.
             // If they do, we don't need to move the object as it falls already in the
             // best probe we can.
             const auto probe_index = [&](size_t pos) {
-                return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth;
+                return ((pos - probe(hashval).offset()) & capacity_) / Group::kWidth;
             };
 
             // Element doesn't move.
             if (PHMAP_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
-                set_ctrl(i, H2(hash));
+                set_ctrl(i, H2(hashval));
                 continue;
             }
             if (IsEmpty(ctrl_[new_i])) {
                 // Transfer element to the empty spot.
                 // set_ctrl poisons/unpoisons the slots so we have to call it at the
                 // right time.
-                set_ctrl(new_i, H2(hash));
+                set_ctrl(new_i, H2(hashval));
                 PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i);
                 set_ctrl(i, kEmpty);
             } else {
                 assert(IsDeleted(ctrl_[new_i]));
-                set_ctrl(new_i, H2(hash));
+                set_ctrl(new_i, H2(hashval));
                 // Until we are done rehashing, DELETED marks previously FULL slots.
                 // Swap i and new_i elements.
                 PolicyTraits::transfer(&alloc_ref(), slot, slots_ + i);
@@ -1882,7 +2014,7 @@ class raw_hash_set
                 --i;  // repeat
             }
         }
-        reset_growth_left();
+        reset_growth_left(capacity_);
     }
 
     void rehash_and_grow_if_necessary() {
@@ -1897,11 +2029,11 @@ class raw_hash_set
         }
     }
 
-    bool has_element(const value_type& elem, size_t hash) const {
-        auto seq = probe(hash);
+    bool has_element(const value_type& elem, size_t hashval) const {
+        auto seq = probe(hashval);
         while (true) {
             Group g{ctrl_ + seq.offset()};
-            for (int i : g.Match((h2_t)H2(hash))) {
+            for (int i : g.Match((h2_t)H2(hashval))) {
                 if (PHMAP_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset((size_t)i)) ==
                                       elem))
                     return true;
@@ -1914,8 +2046,8 @@ class raw_hash_set
     }
 
     bool has_element(const value_type& elem) const {
-        size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem);
-        return has_element(elem, hash);
+        size_t hashval = PolicyTraits::apply(HashElement{hash_ref()}, elem);
+        return has_element(elem, hashval);
     }
 
     // Probes the raw_hash_set with the probe sequence for hash and returns the
@@ -1932,8 +2064,8 @@ class raw_hash_set
         size_t offset;
         size_t probe_length;
     };
-    FindInfo find_first_non_full(size_t hash) {
-        auto seq = probe(hash);
+    FindInfo find_first_non_full(size_t hashval) {
+        auto seq = probe(hashval);
         while (true) {
             Group g{ctrl_ + seq.offset()};
             auto mask = g.MatchEmptyOrDeleted();
@@ -1959,11 +2091,11 @@ class raw_hash_set
 
 protected:
     template <class K>
-    std::pair<size_t, bool> find_or_prepare_insert(const K& key, size_t hash) {
-        auto seq = probe(hash);
+    std::pair<size_t, bool> find_or_prepare_insert(const K& key, size_t hashval) {
+        auto seq = probe(hashval);
         while (true) {
             Group g{ctrl_ + seq.offset()};
-            for (int i : g.Match((h2_t)H2(hash))) {
+            for (int i : g.Match((h2_t)H2(hashval))) {
                 if (PHMAP_PREDICT_TRUE(PolicyTraits::apply(
                                           EqualElement<K>{key, eq_ref()},
                                           PolicyTraits::element(slots_ + seq.offset((size_t)i)))))
@@ -1972,25 +2104,25 @@ class raw_hash_set
             if (PHMAP_PREDICT_TRUE(g.MatchEmpty())) break;
             seq.next();
         }
-        return {prepare_insert(hash), true};
+        return {prepare_insert(hashval), true};
     }
 
     template <class K>
     std::pair<size_t, bool> find_or_prepare_insert(const K& key) {
-        return find_or_prepare_insert(key, HashElement{hash_ref()}(key));
+        return find_or_prepare_insert(key, this->hash(key));
     }
 
-    size_t prepare_insert(size_t hash) PHMAP_ATTRIBUTE_NOINLINE {
-        auto target = find_first_non_full(hash);
+    size_t prepare_insert(size_t hashval) PHMAP_ATTRIBUTE_NOINLINE {
+        auto target = find_first_non_full(hashval);
         if (PHMAP_PREDICT_FALSE(growth_left() == 0 &&
                                !IsDeleted(ctrl_[target.offset]))) {
             rehash_and_grow_if_necessary();
-            target = find_first_non_full(hash);
+            target = find_first_non_full(hashval);
         }
         ++size_;
         growth_left() -= IsEmpty(ctrl_[target.offset]);
-        set_ctrl(target.offset, H2(hash));
-        infoz_.RecordInsert(hash, target.probe_length);
+        set_ctrl(target.offset, H2(hashval));
+        infoz_.RecordInsert(hashval, target.probe_length);
         return target.offset;
     }
 
@@ -2018,19 +2150,19 @@ class raw_hash_set
 private:
     friend struct RawHashSetTestOnlyAccess;
 
-    probe_seq<Group::kWidth> probe(size_t hash) const {
-        return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_);
+    probe_seq<Group::kWidth> probe(size_t hashval) const {
+        return probe_seq<Group::kWidth>(H1(hashval, ctrl_), capacity_);
     }
 
     // Reset all ctrl bytes back to kEmpty, except the sentinel.
-    void reset_ctrl() {
-        std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth);
-        ctrl_[capacity_] = kSentinel;
-        SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_);
+    void reset_ctrl(size_t capacity) {
+        std::memset(ctrl_, kEmpty, capacity + Group::kWidth);
+        ctrl_[capacity] = kSentinel;
+        SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity);
     }
 
-    void reset_growth_left() {
-        growth_left() = CapacityToGrowth(capacity()) - size_;
+    void reset_growth_left(size_t capacity) {
+        growth_left() = CapacityToGrowth(capacity) - size_;
     }
 
     // Sets the control byte, and if `i < Group::kWidth`, set the cloned byte at
@@ -2094,7 +2226,7 @@ class raw_hash_set
     size_t size_ = 0;                // number of full slots
     size_t capacity_ = 0;            // total number of slots
     HashtablezInfoHandle infoz_;
-    phmap::container_internal::CompressedTuple<size_t /* growth_left */, hasher,
+    phmap::priv::CompressedTuple<size_t /* growth_left */, hasher,
                                               key_equal, allocator_type>
     settings_{0, hasher{}, key_equal{}, allocator_type{}};
 };
@@ -2296,6 +2428,7 @@ class parallel_hash_set
     using EmbeddedSet     = RefSet<Policy, Hash, Eq, Alloc>;
     using EmbeddedIterator= typename EmbeddedSet::iterator;
     using EmbeddedConstIterator= typename EmbeddedSet::const_iterator;
+    using constructor     = typename EmbeddedSet::constructor;
     using init_type       = typename PolicyTraits::init_type;
     using key_type        = typename PolicyTraits::key_type;
     using slot_type       = typename PolicyTraits::slot_type;
@@ -2501,42 +2634,42 @@ class parallel_hash_set
         std::is_nothrow_default_constructible<key_equal>::value&&
         std::is_nothrow_default_constructible<allocator_type>::value) {}
 
-    explicit parallel_hash_set(size_t bucket_count, 
+    explicit parallel_hash_set(size_t bucket_cnt, 
                                const hasher& hash_param    = hasher(),
                                const key_equal& eq         = key_equal(),
                                const allocator_type& alloc = allocator_type()) {
         for (auto& inner : sets_)
-            inner.set_ = EmbeddedSet(bucket_count / N, hash_param, eq, alloc);
+            inner.set_ = EmbeddedSet(bucket_cnt / N, hash_param, eq, alloc);
     }
 
-    parallel_hash_set(size_t bucket_count, 
+    parallel_hash_set(size_t bucket_cnt, 
                       const hasher& hash_param,
                       const allocator_type& alloc)
-        : parallel_hash_set(bucket_count, hash_param, key_equal(), alloc) {}
+        : parallel_hash_set(bucket_cnt, hash_param, key_equal(), alloc) {}
 
-    parallel_hash_set(size_t bucket_count, const allocator_type& alloc)
-        : parallel_hash_set(bucket_count, hasher(), key_equal(), alloc) {}
+    parallel_hash_set(size_t bucket_cnt, const allocator_type& alloc)
+        : parallel_hash_set(bucket_cnt, hasher(), key_equal(), alloc) {}
 
     explicit parallel_hash_set(const allocator_type& alloc)
         : parallel_hash_set(0, hasher(), key_equal(), alloc) {}
 
     template <class InputIter>
-    parallel_hash_set(InputIter first, InputIter last, size_t bucket_count = 0,
+    parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt = 0,
                       const hasher& hash_param = hasher(), const key_equal& eq = key_equal(),
                       const allocator_type& alloc = allocator_type())
-        : parallel_hash_set(bucket_count, hash_param, eq, alloc) {
+        : parallel_hash_set(bucket_cnt, hash_param, eq, alloc) {
         insert(first, last);
     }
 
     template <class InputIter>
-    parallel_hash_set(InputIter first, InputIter last, size_t bucket_count,
+    parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt,
                       const hasher& hash_param, const allocator_type& alloc)
-        : parallel_hash_set(first, last, bucket_count, hash_param, key_equal(), alloc) {}
+        : parallel_hash_set(first, last, bucket_cnt, hash_param, key_equal(), alloc) {}
 
     template <class InputIter>
-    parallel_hash_set(InputIter first, InputIter last, size_t bucket_count,
+    parallel_hash_set(InputIter first, InputIter last, size_t bucket_cnt,
                       const allocator_type& alloc)
-        : parallel_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {}
+        : parallel_hash_set(first, last, bucket_cnt, hasher(), key_equal(), alloc) {}
 
     template <class InputIter>
     parallel_hash_set(InputIter first, InputIter last, const allocator_type& alloc)
@@ -2565,33 +2698,33 @@ class parallel_hash_set
     // RequiresNotInit<T> is a workaround for gcc prior to 7.1.
     // --------------------------------------------------------------------
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    parallel_hash_set(std::initializer_list<T> init, size_t bucket_count = 0,
+    parallel_hash_set(std::initializer_list<T> init, size_t bucket_cnt = 0,
                       const hasher& hash_param = hasher(), const key_equal& eq = key_equal(),
                       const allocator_type& alloc = allocator_type())
-        : parallel_hash_set(init.begin(), init.end(), bucket_count, hash_param, eq, alloc) {}
+        : parallel_hash_set(init.begin(), init.end(), bucket_cnt, hash_param, eq, alloc) {}
 
-    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_count = 0,
+    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt = 0,
                       const hasher& hash_param = hasher(), const key_equal& eq = key_equal(),
                       const allocator_type& alloc = allocator_type())
-        : parallel_hash_set(init.begin(), init.end(), bucket_count, hash_param, eq, alloc) {}
+        : parallel_hash_set(init.begin(), init.end(), bucket_cnt, hash_param, eq, alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    parallel_hash_set(std::initializer_list<T> init, size_t bucket_count,
+    parallel_hash_set(std::initializer_list<T> init, size_t bucket_cnt,
                       const hasher& hash_param, const allocator_type& alloc)
-        : parallel_hash_set(init, bucket_count, hash_param, key_equal(), alloc) {}
+        : parallel_hash_set(init, bucket_cnt, hash_param, key_equal(), alloc) {}
 
-    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
+    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt,
                       const hasher& hash_param, const allocator_type& alloc)
-        : parallel_hash_set(init, bucket_count, hash_param, key_equal(), alloc) {}
+        : parallel_hash_set(init, bucket_cnt, hash_param, key_equal(), alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
-    parallel_hash_set(std::initializer_list<T> init, size_t bucket_count,
+    parallel_hash_set(std::initializer_list<T> init, size_t bucket_cnt,
                       const allocator_type& alloc)
-        : parallel_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
+        : parallel_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {}
 
-    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
+    parallel_hash_set(std::initializer_list<init_type> init, size_t bucket_cnt,
                       const allocator_type& alloc)
-        : parallel_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
+        : parallel_hash_set(init, bucket_cnt, hasher(), key_equal(), alloc) {}
 
     template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
     parallel_hash_set(std::initializer_list<T> init, const allocator_type& alloc)
@@ -2673,7 +2806,18 @@ class parallel_hash_set
 
     PHMAP_ATTRIBUTE_REINITIALIZES void clear() {
         for (auto& inner : sets_)
+        {
+            typename Lockable::UniqueLock m(inner);
             inner.set_.clear();
+        }
+    }
+
+    // extension - clears only soecified submap
+    // ----------------------------------------
+    void clear(std::size_t submap_index) {
+        Inner& inner = sets_[submap_index];
+        typename Lockable::UniqueLock m(inner);
+        inner.set_.clear();
     }
 
     // This overload kicks in when the argument is an rvalue of insertable and
@@ -2761,7 +2905,7 @@ class parallel_hash_set
         if (!node) 
             return {end(), false, node_type()};
         auto& key      = node.key();
-        size_t hashval = HashElement{hash_ref()}(key);
+        size_t hashval = this->hash(key);
         Inner& inner   = sets_[subidx(hashval)];
         auto&  set     = inner.set_;
 
@@ -2784,10 +2928,88 @@ class parallel_hash_set
         }
     };
 
+    // --------------------------------------------------------------------
+    // phmap expension: emplace_with_hash
+    // ----------------------------------
+    // same as emplace, but hashval is provided
+    // --------------------------------------------------------------------
+    template <class K, class... Args>
+    std::pair<iterator, bool> emplace_decomposable_with_hash(const K& key, size_t hashval, Args&&... args)
+    {
+        Inner& inner   = sets_[subidx(hashval)];
+        auto&  set     = inner.set_;
+        typename Lockable::UniqueLock m(inner);
+        return make_rv(&inner, set.emplace_decomposable(key, hashval, std::forward<Args>(args)...));
+    }
+
+    struct EmplaceDecomposableHashval 
+    {
+        template <class K, class... Args>
+        std::pair<iterator, bool> operator()(const K& key, Args&&... args) const {
+            return s.emplace_decomposable_with_hash(key, hashval, std::forward<Args>(args)...);
+        }
+        parallel_hash_set& s;
+        size_t hashval;
+    };
+
+    // This overload kicks in if we can deduce the key from args. This enables us
+    // to avoid constructing value_type if an entry with the same key already
+    // exists.
+    //
+    // For example:
+    //
+    //   flat_hash_map<std::string, std::string> m = {{"abc", "def"}};
+    //   // Creates no std::string copies and makes no heap allocations.
+    //   m.emplace("abc", "xyz");
+    // --------------------------------------------------------------------
+    template <class... Args, typename std::enable_if<
+                                 IsDecomposable<Args...>::value, int>::type = 0>
+    std::pair<iterator, bool> emplace_with_hash(size_t hashval, Args&&... args) {
+        return PolicyTraits::apply(EmplaceDecomposableHashval{*this, hashval},
+                                   std::forward<Args>(args)...);
+    }
+
+    // This overload kicks in if we cannot deduce the key from args. It constructs
+    // value_type unconditionally and then either moves it into the table or
+    // destroys.
+    // --------------------------------------------------------------------
+    template <class... Args, typename std::enable_if<
+                                 !IsDecomposable<Args...>::value, int>::type = 0>
+    std::pair<iterator, bool> emplace_with_hash(size_t hashval, Args&&... args) {
+        typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type raw;
+        slot_type* slot = reinterpret_cast<slot_type*>(&raw);
+
+        PolicyTraits::construct(&alloc_ref(), slot, std::forward<Args>(args)...);
+        const auto& elem = PolicyTraits::element(slot);
+        Inner& inner    = sets_[subidx(hashval)];
+        auto&  set      = inner.set_;
+        typename Lockable::UniqueLock m(inner);
+        typename EmbeddedSet::template InsertSlotWithHash<true> f {
+            inner, std::move(*slot), hashval};
+        return make_rv(PolicyTraits::apply(f, elem));
+    }
+
+    template <class... Args>
+    iterator emplace_hint_with_hash(size_t hashval, const_iterator, Args&&... args) {
+        return emplace_with_hash(hashval, std::forward<Args>(args)...).first;
+    }
+
+    template <class K = key_type, class F>
+    iterator lazy_emplace_with_hash(size_t hashval, const key_arg<K>& key, F&& f) {
+        Inner& inner = sets_[subidx(hashval)];
+        auto&  set   = inner.set_;
+        typename Lockable::UniqueLock m(inner);
+        return make_iterator(&inner, set.lazy_emplace_with_hash(key, hashval, std::forward<F>(f)));
+    }
+
+    // --------------------------------------------------------------------
+    // end of phmap expension
+    // --------------------------------------------------------------------
+
     template <class K, class... Args>
     std::pair<iterator, bool> emplace_decomposable(const K& key, Args&&... args)
     {
-        size_t hashval = HashElement{hash_ref()}(key);
+        size_t hashval = this->hash(key);
         Inner& inner   = sets_[subidx(hashval)];
         auto&  set     = inner.set_;
         typename Lockable::UniqueLock m(inner);
@@ -2827,13 +3049,12 @@ class parallel_hash_set
     template <class... Args, typename std::enable_if<
                                  !IsDecomposable<Args...>::value, int>::type = 0>
     std::pair<iterator, bool> emplace(Args&&... args) {
-        typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type
-            raw;
+        typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type raw;
         slot_type* slot = reinterpret_cast<slot_type*>(&raw);
+        size_t hashval  = this->hash(PolicyTraits::key(slot));
 
         PolicyTraits::construct(&alloc_ref(), slot, std::forward<Args>(args)...);
         const auto& elem = PolicyTraits::element(slot);
-        size_t hashval  = HashElement{hash_ref()}(PolicyTraits::key(slot));
         Inner& inner    = sets_[subidx(hashval)];
         auto&  set      = inner.set_;
         typename Lockable::UniqueLock m(inner);
@@ -2862,12 +3083,44 @@ class parallel_hash_set
 
     template <class K = key_type, class F>
     iterator lazy_emplace(const key_arg<K>& key, F&& f) {
-        auto hashval = HashElement{hash_ref()}(key);
+        auto hashval = this->hash(key);
         Inner& inner = sets_[subidx(hashval)];
         auto&  set   = inner.set_;
         typename Lockable::UniqueLock m(inner);
         return make_iterator(&inner, set.lazy_emplace_with_hash(key, hashval, std::forward<F>(f)));
     }
+    
+    template <class K = key_type, class F>
+    void emplace_single(const key_arg<K>& key, F&& f) {
+        auto hashval = this->hash(key);
+        Inner& inner = sets_[subidx(hashval)];
+        auto&  set   = inner.set_;
+        typename Lockable::UniqueLock m(inner);
+        set.emplace_single_with_hash(key, hashval, std::forward<F>(f));
+    }
+
+    // Extension API: support iterating over all values
+    //
+    // flat_hash_set<std::string> s;
+    // s.insert(...);
+    // s.for_each([](auto const & key) {
+    //    // Safely iterates over all the keys
+    // });
+    template <class F>
+    void for_each(F&& fCallback) const {
+        for (auto const& inner : sets_) {
+            typename Lockable::SharedLock m(const_cast<Inner&>(inner));
+            std::for_each(inner.set_.begin(), inner.set_.end(), fCallback);
+        }
+    }
+
+    // this version allows to modify the values
+    void for_each_m(std::function<void (value_type&)> && fCallback) {
+        for (auto& inner : sets_) {
+            typename Lockable::UniqueLock m(const_cast<Inner&>(inner));
+            std::for_each(inner.set_.begin(), inner.set_.end(), fCallback);
+        }
+    }
 
     // Extension API: support for heterogeneous keys.
     //
@@ -2881,7 +3134,7 @@ class parallel_hash_set
     // --------------------------------------------------------------------
     template <class K = key_type>
     size_type erase(const key_arg<K>& key) {
-        auto hashval = HashElement{hash_ref()}(key);
+        auto hashval = this->hash(key);
         Inner& inner = sets_[subidx(hashval)];
         auto&  set   = inner.set_;
         typename Lockable::UpgradeLock m(inner);
@@ -3010,16 +3263,16 @@ class parallel_hash_set
     // NOTE: This is a very low level operation and should not be used without
     // specific benchmarks indicating its importance.
     // --------------------------------------------------------------------
-    template <class K = key_type>
-    void prefetch(const key_arg<K>& key) const {
-        (void)key;
-#if 0 && defined(__GNUC__)
-        size_t hashval     = HashElement{hash_ref()}(key);
+    void prefetch_hash(size_t hashval) const {
         const Inner& inner = sets_[subidx(hashval)];
         const auto&  set   = inner.set_;
-        typename Lockable::UniqueLock m(inner);
+        typename Lockable::SharedLock m(const_cast<Inner&>(inner));
         set.prefetch_hash(hashval);
-#endif  // __GNUC__
+    }
+
+    template <class K = key_type>
+    void prefetch(const key_arg<K>& key) const {
+        prefetch_hash(this->hash(key));
     }
 
     // The API of find() has two extensions.
@@ -3038,7 +3291,7 @@ class parallel_hash_set
 
     template <class K = key_type>
     iterator find(const key_arg<K>& key) {
-        return find(key, HashElement{hash_ref()}(key));
+        return find(key, this->hash(key));
     }
 
     template <class K = key_type>
@@ -3048,7 +3301,7 @@ class parallel_hash_set
 
     template <class K = key_type>
     const_iterator find(const key_arg<K>& key) const {
-        return find(key, HashElement{hash_ref()}(key));
+        return find(key, this->hash(key));
     }
 
     template <class K = key_type>
@@ -3056,6 +3309,11 @@ class parallel_hash_set
         return find(key) != end();
     }
 
+    template <class K = key_type>
+    bool contains(const key_arg<K>& key, size_t hashval) const {
+        return find(key, hashval) != end();
+    }
+
     template <class K = key_type>
     std::pair<iterator, iterator> equal_range(const key_arg<K>& key) {
         auto it = find(key);
@@ -3082,8 +3340,8 @@ class parallel_hash_set
     }
 
     float load_factor() const {
-        size_t capacity = bucket_count();
-        return capacity ? static_cast<float>(static_cast<double>(size()) / capacity) : 0;
+        size_t _capacity = bucket_count();
+        return _capacity ? static_cast<float>(static_cast<double>(size()) / _capacity) : 0;
     }
 
     float max_load_factor() const { return 1.0f; }
@@ -3091,7 +3349,7 @@ class parallel_hash_set
         // Does nothing.
     }
 
-    hasher hash_function() const { return hash_ref(); }
+    hasher hash_function() const { return hash_ref(); }  // warning: doesn't match internal hash - use hash() member function
     key_equal key_eq() const { return eq_ref(); }
     allocator_type get_allocator() const { return alloc_ref(); }
 
@@ -3108,17 +3366,22 @@ class parallel_hash_set
         a.swap(b);
     }
 
-#ifndef PHMAP_NON_DETERMINISTIC
+    template <class K>
+    size_t hash(const K& key) const {
+        return HashElement{hash_ref()}(key);
+    }
+
+#if !defined(PHMAP_NON_DETERMINISTIC)
     template<typename OutputArchive>
-    bool dump(OutputArchive& ar) const;
+    bool phmap_dump(OutputArchive& ar) const;
 
     template<typename InputArchive>
-    bool load(InputArchive& ar);
+    bool phmap_load(InputArchive& ar);
 #endif
 
 private:
     template <class Container, typename Enabler>
-    friend struct phmap::container_internal::hashtable_debug_internal::HashtableDebugAccess;
+    friend struct phmap::priv::hashtable_debug_internal::HashtableDebugAccess;
 
     struct FindElement 
     {
@@ -3191,19 +3454,26 @@ class parallel_hash_set
     }
 
 protected:
-    template <class K = key_type>
-    iterator find(const key_arg<K>& key, size_t hashval, typename Lockable::SharedLock &mutexlock) {
+    template <class K = key_type, class L = typename Lockable::SharedLock>
+    pointer find_ptr(const key_arg<K>& key, size_t hashval, L& mutexlock)
+    {
         Inner& inner = sets_[subidx(hashval)];
-        auto&  set = inner.set_;
-        mutexlock = std::move(typename Lockable::SharedLock(inner));
-        auto  it = set.find(key, hashval);
-        return make_iterator(&inner, it);
+        auto& set = inner.set_;
+        mutexlock = std::move(L(inner));
+        return set.find_ptr(key, hashval);
+    }
+
+    template <class K = key_type, class L = typename Lockable::SharedLock>
+    iterator find(const key_arg<K>& key, size_t hashval, L& mutexlock) {
+        Inner& inner = sets_[subidx(hashval)];
+        auto& set = inner.set_;
+        mutexlock = std::move(L(inner));
+        return make_iterator(&inner, set.find(key, hashval));
     }
 
     template <class K>
     std::tuple<Inner*, size_t, bool> 
-    find_or_prepare_insert(const K& key, typename Lockable::UniqueLock &mutexlock) {
-        auto hashval = HashElement{hash_ref()}(key);
+    find_or_prepare_insert_with_hash(size_t hashval, const K& key, typename Lockable::UniqueLock &mutexlock) {
         Inner& inner = sets_[subidx(hashval)];
         auto&  set   = inner.set_;
         mutexlock    = std::move(typename Lockable::UniqueLock(inner));
@@ -3211,6 +3481,12 @@ class parallel_hash_set
         return std::make_tuple(&inner, p.first, p.second);
     }
 
+    template <class K>
+    std::tuple<Inner*, size_t, bool> 
+    find_or_prepare_insert(const K& key, typename Lockable::UniqueLock &mutexlock) {
+        return find_or_prepare_insert_with_hash<K>(this->hash(key), key, mutexlock);
+    }
+
     iterator iterator_at(Inner *inner, 
                          const EmbeddedIterator& it) { 
         return {inner, &sets_[0] + num_tables, it}; 
@@ -3221,12 +3497,7 @@ class parallel_hash_set
     }
 
     static size_t subidx(size_t hashval) {
-        return (hashval ^ (hashval >> N)) & mask;
-    }
-
-    template <class K>
-    size_t hash(const K& key) const {
-        return HashElement{hash_ref()}(key);
+        return ((hashval >> 8) ^ (hashval >> 16) ^ (hashval >> 24)) & mask;
     }
 
     static size_t subcnt() {
@@ -3252,6 +3523,7 @@ class parallel_hash_set
         return sets_[0].set_.alloc_ref();
     }
 
+protected:       // protected in case users want to derive fromm this
     std::array<Inner, num_tables> sets_;
 };
 
@@ -3393,19 +3665,108 @@ class parallel_hash_map : public parallel_hash_set<N, RefSet, Mtx_, Policy, Hash
         return Policy::value(&*it);
     }
 
+    // ----------- phmap extensions --------------------------
+
+    template <class K = key_type, class... Args,
+              typename std::enable_if<
+                  !std::is_convertible<K, const_iterator>::value, int>::type = 0,
+              K* = nullptr>
+    std::pair<iterator, bool> try_emplace_with_hash(size_t hashval, key_arg<K>&& k, Args&&... args) {
+        return try_emplace_impl_with_hash(hashval, std::forward<K>(k), std::forward<Args>(args)...);
+    }
+
+    template <class K = key_type, class... Args,
+              typename std::enable_if<
+                  !std::is_convertible<K, const_iterator>::value, int>::type = 0>
+    std::pair<iterator, bool> try_emplace_with_hash(size_t hashval, const key_arg<K>& k, Args&&... args) {
+        return try_emplace_impl_with_hash(hashval, k, std::forward<Args>(args)...);
+    }
+
+    template <class K = key_type, class... Args, K* = nullptr>
+    iterator try_emplace_with_hash(size_t hashval, const_iterator, key_arg<K>&& k, Args&&... args) {
+        return try_emplace_with_hash(hashval, std::forward<K>(k), std::forward<Args>(args)...).first;
+    }
+
+    template <class K = key_type, class... Args>
+    iterator try_emplace_with_hash(size_t hashval, const_iterator, const key_arg<K>& k, Args&&... args) {
+        return try_emplace_with_hash(hashval, k, std::forward<Args>(args)...).first;
+    }
+
+    // if map contains key, lambda is called with the mapped value (under read lock protection),
+    // and if_contains returns true. This is a const API and lambda should not modify the value
+    // -----------------------------------------------------------------------------------------
     template <class K = key_type, class F>
     bool if_contains(const key_arg<K>& key, F&& f) const {
-#if __cplusplus >= 201703L
-        static_assert(std::is_invocable<F, mapped_type&>::value);
-#endif
-        typename Lockable::SharedLock m;
-        auto it = const_cast<parallel_hash_map*>(this)->find(key, this->hash(key), m);
-        if (it == this->end())
-            return false;
-        std::forward<F>(f)(Policy::value(&*it));
-        return true;
+        return const_cast<parallel_hash_map*>(this)->template 
+            modify_if_impl<K, F, typename Lockable::SharedLock>(key, std::forward<F>(f));
+    }
+
+    // if map contains key, lambda is called with the mapped value without read lock protection,
+    // and if_contains_unsafe returns true. This is a const API and lambda should not modify the value
+    // This should be used only if we know that no other thread may be mutating the map at the time.
+    // -----------------------------------------------------------------------------------------
+    template <class K = key_type, class F>
+    bool if_contains_unsafe(const key_arg<K>& key, F&& f) const {
+        return const_cast<parallel_hash_map*>(this)->template 
+            modify_if_impl<K, F, LockableBaseImpl<phmap::NullMutex>::DoNothing>(key, std::forward<F>(f));
+    }
+
+    // if map contains key, lambda is called with the mapped value  (under write lock protection),
+    // and modify_if returns true. This is a non-const API and lambda is allowed to modify the mapped value
+    // ----------------------------------------------------------------------------------------------------
+    template <class K = key_type, class F>
+    bool modify_if(const key_arg<K>& key, F&& f) {
+        return modify_if_impl<K, F, typename Lockable::UniqueLock>(key, std::forward<F>(f));
+    }
+
+    // if map contains key, lambda is called with the mapped value  (under write lock protection).
+    // If the lambda returns true, the key is subsequently erased from the map (the write lock
+    // is only released after erase).
+    // returns true if key was erased, false otherwise.
+    // ----------------------------------------------------------------------------------------------------
+    template <class K = key_type, class F>
+    bool erase_if(const key_arg<K>& key, F&& f) {
+        return erase_if_impl<K, F, typename Lockable::UniqueLock>(key, std::forward<F>(f));
+    }
+
+    // if map does not contains key, it is inserted and the mapped value is value-constructed 
+    // with the provided arguments (if any), as with try_emplace. 
+    // if map already  contains key, then the lambda is called with the mapped value (under 
+    // write lock protection) and can update the mapped value.
+    // returns true if key was not already present, false otherwise.
+    // ---------------------------------------------------------------------------------------
+    template <class K = key_type, class F, class... Args>
+    bool try_emplace_l(K&& k, F&& f, Args&&... args) {
+        typename Lockable::UniqueLock m;
+        auto res = this->find_or_prepare_insert(k, m);
+        typename Base::Inner *inner = std::get<0>(res);
+        if (std::get<2>(res))
+            inner->set_.emplace_at(std::get<1>(res), std::piecewise_construct,
+                                   std::forward_as_tuple(std::forward<K>(k)),
+                                   std::forward_as_tuple(std::forward<Args>(args)...));
+        else {
+            auto it = this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res)));
+            std::forward<F>(f)(Policy::value(&*it));
+        }
+        return std::get<2>(res);
+    }
+
+    template <class K = key_type, class FExists, class FEmplace>
+    bool lazy_emplace_l(const key_arg<K>& key, FExists&& fExists, FEmplace&& fEmplace) {
+        typename Lockable::UniqueLock m;
+        auto res = this->find_or_prepare_insert(key, m);
+        typename Base::Inner* inner = std::get<0>(res);
+        if (std::get<2>(res))
+            inner->set_.lazy_emplace_at(std::get<1>(res), std::forward<FEmplace>(fEmplace));
+        else {
+            auto it = this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res)));
+            std::forward<FExists>(fExists)(Policy::value(&*it));
+        }
+        return std::get<2>(res);
     }
 
+    // ----------- end of phmap extensions --------------------------
+
     template <class K = key_type, class P = Policy, K* = nullptr>
     MappedReference<P> operator[](key_arg<K>&& key) {
         return Policy::value(&*try_emplace(std::forward<K>(key)).first);
@@ -3417,6 +3778,36 @@ class parallel_hash_map : public parallel_hash_set<N, RefSet, Mtx_, Policy, Hash
     }
 
 private:
+    template <class K = key_type, class F, class L>
+    bool modify_if_impl(const key_arg<K>& key, F&& f) {
+#if __cplusplus >= 201703L
+        static_assert(std::is_invocable<F, mapped_type&>::value);
+#endif
+        L m;
+        auto ptr = this->template find_ptr<K, L>(key, this->hash(key), m);
+        if (ptr == nullptr)
+            return false;
+        std::forward<F>(f)(Policy::value(ptr));
+        return true;
+    }
+
+    template <class K = key_type, class F, class L>
+    bool erase_if_impl(const key_arg<K>& key, F&& f) {
+#if __cplusplus >= 201703L
+        static_assert(std::is_invocable<F, mapped_type&>::value);
+#endif
+        L m;
+        auto it = this->template find<K, L>(key, this->hash(key), m);
+        if (it == this->end()) return false;
+        if (std::forward<F>(f)(Policy::value(&*it)))
+        {
+            this->erase(it);
+            return true;
+        }
+        return false;
+    }
+
+
     template <class K, class V>
     std::pair<iterator, bool> insert_or_assign_impl(K&& k, V&& v) {
         typename Lockable::UniqueLock m;
@@ -3442,6 +3833,21 @@ class parallel_hash_map : public parallel_hash_set<N, RefSet, Mtx_, Policy, Hash
         return {this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))), 
                 std::get<2>(res)};
     }
+
+    template <class K = key_type, class... Args>
+    std::pair<iterator, bool> try_emplace_impl_with_hash(size_t hashval, K&& k, Args&&... args) {
+        typename Lockable::UniqueLock m;
+        auto res = this->find_or_prepare_insert_with_hash(hashval, k, m);
+        typename Base::Inner *inner = std::get<0>(res);
+        if (std::get<2>(res))
+            inner->set_.emplace_at(std::get<1>(res), std::piecewise_construct,
+                                   std::forward_as_tuple(std::forward<K>(k)),
+                                   std::forward_as_tuple(std::forward<Args>(args)...));
+        return {this->iterator_at(inner, inner->set_.iterator_at(std::get<1>(res))), 
+                std::get<2>(res)};
+    }
+
+    
 };
 
 
@@ -3559,7 +3965,7 @@ struct FlatHashSetPolicy
     template <class Allocator, class... Args>
     static void construct(Allocator* alloc, slot_type* slot, Args&&... args) {
         phmap::allocator_traits<Allocator>::construct(*alloc, slot,
-                                                     std::forward<Args>(args)...);
+                                                      std::forward<Args>(args)...);
     }
 
     template <class Allocator>
@@ -3577,10 +3983,10 @@ struct FlatHashSetPolicy
     static T& element(slot_type* slot) { return *slot; }
 
     template <class F, class... Args>
-    static decltype(phmap::container_internal::DecomposeValue(
+    static decltype(phmap::priv::DecomposeValue(
                         std::declval<F>(), std::declval<Args>()...))
     apply(F&& f, Args&&... args) {
-        return phmap::container_internal::DecomposeValue(
+        return phmap::priv::DecomposeValue(
             std::forward<F>(f), std::forward<Args>(args)...);
     }
 
@@ -3592,7 +3998,7 @@ struct FlatHashSetPolicy
 template <class K, class V>
 struct FlatHashMapPolicy 
 {
-    using slot_policy = container_internal::map_slot_policy<K, V>;
+    using slot_policy = priv::map_slot_policy<K, V>;
     using slot_type = typename slot_policy::slot_type;
     using key_type = K;
     using mapped_type = V;
@@ -3615,10 +4021,10 @@ struct FlatHashMapPolicy
     }
 
     template <class F, class... Args>
-    static decltype(phmap::container_internal::DecomposePair(
+    static decltype(phmap::priv::DecomposePair(
                         std::declval<F>(), std::declval<Args>()...))
     apply(F&& f, Args&&... args) {
-        return phmap::container_internal::DecomposePair(std::forward<F>(f),
+        return phmap::priv::DecomposePair(std::forward<F>(f),
                                                         std::forward<Args>(args)...);
     }
 
@@ -3674,7 +4080,7 @@ struct node_hash_policy {
 // --------------------------------------------------------------------------
 template <class T>
 struct NodeHashSetPolicy
-    : phmap::container_internal::node_hash_policy<T&, NodeHashSetPolicy<T>> 
+    : phmap::priv::node_hash_policy<T&, NodeHashSetPolicy<T>> 
 {
     using key_type = T;
     using init_type = T;
@@ -3701,10 +4107,10 @@ struct NodeHashSetPolicy
     }
 
     template <class F, class... Args>
-        static decltype(phmap::container_internal::DecomposeValue(
+        static decltype(phmap::priv::DecomposeValue(
                             std::declval<F>(), std::declval<Args>()...))
         apply(F&& f, Args&&... args) {
-        return phmap::container_internal::DecomposeValue(
+        return phmap::priv::DecomposeValue(
             std::forward<F>(f), std::forward<Args>(args)...);
     }
 
@@ -3715,7 +4121,7 @@ struct NodeHashSetPolicy
 // --------------------------------------------------------------------------
 template <class Key, class Value>
 class NodeHashMapPolicy
-    : public phmap::container_internal::node_hash_policy<
+    : public phmap::priv::node_hash_policy<
           std::pair<const Key, Value>&, NodeHashMapPolicy<Key, Value>> 
 {
     using value_type = std::pair<const Key, Value>;
@@ -3747,10 +4153,10 @@ class NodeHashMapPolicy
     }
 
     template <class F, class... Args>
-        static decltype(phmap::container_internal::DecomposePair(
+        static decltype(phmap::priv::DecomposePair(
                             std::declval<F>(), std::declval<Args>()...))
         apply(F&& f, Args&&... args) {
-        return phmap::container_internal::DecomposePair(std::forward<F>(f),
+        return phmap::priv::DecomposePair(std::forward<F>(f),
                                                         std::forward<Args>(args)...);
     }
 
@@ -3872,11 +4278,11 @@ struct HashtableDebugAccess<Set, phmap::void_t<typename Set::raw_hash_set>>
     static size_t GetNumProbes(const Set& set,
                                const typename Set::key_type& key) {
         size_t num_probes = 0;
-        size_t hash = typename Set::HashElement{set.hash_ref()}(key); 
-        auto seq = set.probe(hash);
+        size_t hashval = set.hash(key); 
+        auto seq = set.probe(hashval);
         while (true) {
-            container_internal::Group g{set.ctrl_ + seq.offset()};
-            for (int i : g.Match(container_internal::H2(hash))) {
+            priv::Group g{set.ctrl_ + seq.offset()};
+            for (int i : g.Match(priv::H2(hashval))) {
                 if (Traits::apply(
                         typename Set::template EqualElement<typename Set::key_type>{
                             key, set.eq_ref()},
@@ -3901,7 +4307,7 @@ struct HashtableDebugAccess<Set, phmap::void_t<typename Set::raw_hash_set>>
             m += per_slot * c.size();
         } else {
             for (size_t i = 0; i != capacity; ++i) {
-                if (container_internal::IsFull(c.ctrl_[i])) {
+                if (priv::IsFull(c.ctrl_[i])) {
                     m += Traits::space_used(c.slots_ + i);
                 }
             }
@@ -3923,7 +4329,7 @@ struct HashtableDebugAccess<Set, phmap::void_t<typename Set::raw_hash_set>>
 };
 
 }  // namespace hashtable_debug_internal
-}  // namespace container_internal
+}  // namespace priv
 
 // -----------------------------------------------------------------------------
 // phmap::flat_hash_set
@@ -3944,8 +4350,8 @@ struct HashtableDebugAccess<Set, phmap::void_t<typename Set::raw_hash_set>>
 // -----------------------------------------------------------------------------
 template <class T, class Hash, class Eq, class Alloc> // default values in phmap_fwd_decl.h
 class flat_hash_set
-    : public phmap::container_internal::raw_hash_set<
-          phmap::container_internal::FlatHashSetPolicy<T>, Hash, Eq, Alloc> 
+    : public phmap::priv::raw_hash_set<
+          phmap::priv::FlatHashSetPolicy<T>, Hash, Eq, Alloc> 
 {
     using Base = typename flat_hash_set::raw_hash_set;
 
@@ -3983,6 +4389,7 @@ class flat_hash_set
     using Base::max_load_factor;
     using Base::get_allocator;
     using Base::hash_function;
+    using Base::hash;
     using Base::key_eq;
 };
 
@@ -4005,8 +4412,8 @@ class flat_hash_set
 // * Returns `void` from the `_erase(iterator)` overload.
 // -----------------------------------------------------------------------------
 template <class K, class V, class Hash, class Eq, class Alloc> // default values in phmap_fwd_decl.h
-class flat_hash_map : public phmap::container_internal::raw_hash_map<
-                          phmap::container_internal::FlatHashMapPolicy<K, V>,
+class flat_hash_map : public phmap::priv::raw_hash_map<
+                          phmap::priv::FlatHashMapPolicy<K, V>,
                           Hash, Eq, Alloc> {
     using Base = typename flat_hash_map::raw_hash_map;
 
@@ -4048,6 +4455,7 @@ class flat_hash_map : public phmap::container_internal::raw_hash_map<
     using Base::max_load_factor;
     using Base::get_allocator;
     using Base::hash_function;
+    using Base::hash;
     using Base::key_eq;
 };
 
@@ -4068,8 +4476,8 @@ class flat_hash_map : public phmap::container_internal::raw_hash_map<
 // -----------------------------------------------------------------------------
 template <class T, class Hash, class Eq, class Alloc> // default values in phmap_fwd_decl.h
 class node_hash_set
-    : public phmap::container_internal::raw_hash_set<
-          phmap::container_internal::NodeHashSetPolicy<T>, Hash, Eq, Alloc> 
+    : public phmap::priv::raw_hash_set<
+          phmap::priv::NodeHashSetPolicy<T>, Hash, Eq, Alloc> 
 {
     using Base = typename node_hash_set::raw_hash_set;
 
@@ -4093,6 +4501,8 @@ class node_hash_set
     using Base::insert;
     using Base::emplace;
     using Base::emplace_hint;
+    using Base::emplace_with_hash;
+    using Base::emplace_hint_with_hash;
     using Base::extract;
     using Base::merge;
     using Base::swap;
@@ -4107,6 +4517,7 @@ class node_hash_set
     using Base::max_load_factor;
     using Base::get_allocator;
     using Base::hash_function;
+    using Base::hash;
     using Base::key_eq;
     typename Base::hasher hash_funct() { return this->hash_function(); }
     void resize(typename Base::size_type hint) { this->rehash(hint); }
@@ -4130,8 +4541,8 @@ class node_hash_set
 // -----------------------------------------------------------------------------
 template <class Key, class Value, class Hash, class Eq, class Alloc>  // default values in phmap_fwd_decl.h
 class node_hash_map
-    : public phmap::container_internal::raw_hash_map<
-          phmap::container_internal::NodeHashMapPolicy<Key, Value>, Hash, Eq,
+    : public phmap::priv::raw_hash_map<
+          phmap::priv::NodeHashMapPolicy<Key, Value>, Hash, Eq,
           Alloc> 
 {
     using Base = typename node_hash_map::raw_hash_map;
@@ -4174,6 +4585,7 @@ class node_hash_map
     using Base::max_load_factor;
     using Base::get_allocator;
     using Base::hash_function;
+    using Base::hash;
     using Base::key_eq;
     typename Base::hasher hash_funct() { return this->hash_function(); }
     void resize(typename Base::size_type hint) { this->rehash(hint); }
@@ -4184,9 +4596,9 @@ class node_hash_map
 // -----------------------------------------------------------------------------
 template <class T, class Hash, class Eq, class Alloc, size_t N, class Mtx_> // default values in phmap_fwd_decl.h
 class parallel_flat_hash_set
-    : public phmap::container_internal::parallel_hash_set<
-         N, phmap::container_internal::raw_hash_set, Mtx_,
-         phmap::container_internal::FlatHashSetPolicy<T>, 
+    : public phmap::priv::parallel_hash_set<
+         N, phmap::priv::raw_hash_set, Mtx_,
+         phmap::priv::FlatHashSetPolicy<T>, 
          Hash, Eq, Alloc> 
 {
     using Base = typename parallel_flat_hash_set::parallel_hash_set;
@@ -4214,6 +4626,8 @@ class parallel_flat_hash_set
     using Base::insert;
     using Base::emplace;
     using Base::emplace_hint;
+    using Base::emplace_with_hash;
+    using Base::emplace_hint_with_hash;
     using Base::extract;
     using Base::merge;
     using Base::swap;
@@ -4235,9 +4649,9 @@ class parallel_flat_hash_set
 // phmap::parallel_flat_hash_map - default values in phmap_fwd_decl.h
 // -----------------------------------------------------------------------------
 template <class K, class V, class Hash, class Eq, class Alloc, size_t N, class Mtx_>
-class parallel_flat_hash_map : public phmap::container_internal::parallel_hash_map<
-                N, phmap::container_internal::raw_hash_set, Mtx_,
-                phmap::container_internal::FlatHashMapPolicy<K, V>,
+class parallel_flat_hash_map : public phmap::priv::parallel_hash_map<
+                N, phmap::priv::raw_hash_set, Mtx_,
+                phmap::priv::FlatHashMapPolicy<K, V>,
                 Hash, Eq, Alloc> 
 {
     using Base = typename parallel_flat_hash_map::parallel_hash_map;
@@ -4267,6 +4681,9 @@ class parallel_flat_hash_map : public phmap::container_internal::parallel_hash_m
     using Base::emplace;
     using Base::emplace_hint;
     using Base::try_emplace;
+    using Base::emplace_with_hash;
+    using Base::emplace_hint_with_hash;
+    using Base::try_emplace_with_hash;
     using Base::extract;
     using Base::merge;
     using Base::swap;
@@ -4291,9 +4708,9 @@ class parallel_flat_hash_map : public phmap::container_internal::parallel_hash_m
 // -----------------------------------------------------------------------------
 template <class T, class Hash, class Eq, class Alloc, size_t N, class Mtx_>
 class parallel_node_hash_set
-    : public phmap::container_internal::parallel_hash_set<
-             N, phmap::container_internal::raw_hash_set, Mtx_,
-             phmap::container_internal::NodeHashSetPolicy<T>, Hash, Eq, Alloc> 
+    : public phmap::priv::parallel_hash_set<
+             N, phmap::priv::raw_hash_set, Mtx_,
+             phmap::priv::NodeHashSetPolicy<T>, Hash, Eq, Alloc> 
 {
     using Base = typename parallel_node_hash_set::parallel_hash_set;
 
@@ -4320,6 +4737,8 @@ class parallel_node_hash_set
     using Base::insert;
     using Base::emplace;
     using Base::emplace_hint;
+    using Base::emplace_with_hash;
+    using Base::emplace_hint_with_hash;
     using Base::extract;
     using Base::merge;
     using Base::swap;
@@ -4344,9 +4763,9 @@ class parallel_node_hash_set
 // -----------------------------------------------------------------------------
 template <class Key, class Value, class Hash, class Eq, class Alloc, size_t N, class Mtx_>
 class parallel_node_hash_map
-    : public phmap::container_internal::parallel_hash_map<
-          N, phmap::container_internal::raw_hash_set, Mtx_,
-          phmap::container_internal::NodeHashMapPolicy<Key, Value>, Hash, Eq,
+    : public phmap::priv::parallel_hash_map<
+          N, phmap::priv::raw_hash_set, Mtx_,
+          phmap::priv::NodeHashMapPolicy<Key, Value>, Hash, Eq,
           Alloc> 
 {
     using Base = typename parallel_node_hash_map::parallel_hash_map;
@@ -4376,6 +4795,9 @@ class parallel_node_hash_map
     using Base::emplace;
     using Base::emplace_hint;
     using Base::try_emplace;
+    using Base::emplace_with_hash;
+    using Base::emplace_hint_with_hash;
+    using Base::try_emplace_with_hash;
     using Base::extract;
     using Base::merge;
     using Base::swap;
diff --git a/include/parallel_hashmap/phmap_base.h b/include/parallel_hashmap/phmap_base.h
index 27976826c..6b9ea9ee5 100644
--- a/include/parallel_hashmap/phmap_base.h
+++ b/include/parallel_hashmap/phmap_base.h
@@ -329,10 +329,10 @@ template <typename T>
 using underlying_type_t = typename std::underlying_type<T>::type;
 
 template< class F, class... ArgTypes>
-#if __cplusplus >= 201703L
-using invoke_result_t = typename std::invoke_result_t<F, ArgTypes...>;
+#if PHMAP_HAVE_CC17
+    using invoke_result_t = typename std::invoke_result_t<F, ArgTypes...>;
 #else
-using invoke_result_t = typename std::result_of<F(ArgTypes...)>::type;
+    using invoke_result_t = typename std::result_of<F(ArgTypes...)>::type;
 #endif
 
 namespace type_traits_internal {
@@ -417,7 +417,7 @@ inline void AssertHashEnabled
 //          hash_policy_traits
 // -----------------------------------------------------------------------------
 namespace phmap {
-namespace container_internal {
+namespace priv {
 
 // Defines how slots are initialized/destroyed/moved.
 template <class Policy, class = void>
@@ -581,7 +581,7 @@ struct hash_policy_traits
     }
 };
 
-}  // namespace container_internal
+}  // namespace priv
 }  // namespace phmap
 
 // -----------------------------------------------------------------------------
@@ -1303,7 +1303,7 @@ constexpr bool HasRebindAlloc(...) {
 }
 
 template <typename T, typename U>
-constexpr bool HasRebindAlloc(typename T::template rebind<U>::other*) {
+constexpr bool HasRebindAlloc(typename std::allocator_traits<T>::template rebind_alloc<U>*) {
   return true;
 }
 
@@ -1527,7 +1527,7 @@ struct allocator_traits
     template <typename A, typename... Args>
     static auto construct_impl(int, A& a,  // NOLINT(runtime/references)
                                Args&&... args)
-        -> decltype(a.construct(std::forward<Args>(args)...)) {
+        -> decltype(std::allocator_traits<A>::construct(a, std::forward<Args>(args)...)) {
         std::allocator_traits<A>::construct(a, std::forward<Args>(args)...);
     }
 
@@ -1538,7 +1538,7 @@ struct allocator_traits
 
     template <typename A, typename T>
     static auto destroy_impl(int, A& a,  // NOLINT(runtime/references)
-                             T* p) -> decltype(a.destroy(p)) {
+                             T* p) -> decltype(std::allocator_traits<A>::destroy(a, p)) {
         std::allocator_traits<A>::destroy(a, p);
     }
     template <typename T>
@@ -2710,7 +2710,7 @@ struct hash<phmap::optional<T> >
 //          common.h
 // -----------------------------------------------------------------------------
 namespace phmap {
-namespace container_internal {
+namespace priv {
 
 template <class, class = void>
 struct IsTransparent : std::false_type {};
@@ -2831,7 +2831,7 @@ template <typename Policy, typename PolicyTraits, typename Alloc,
           typename = void>
 class node_handle : public node_handle_base<PolicyTraits, Alloc> 
 {
-    using Base = typename node_handle::node_handle_base;
+    using Base = node_handle_base<PolicyTraits, Alloc>;
 
 public:
     using value_type = typename PolicyTraits::value_type;
@@ -2855,7 +2855,7 @@ class node_handle<Policy, PolicyTraits, Alloc,
                   phmap::void_t<typename Policy::mapped_type>>
     : public node_handle_base<PolicyTraits, Alloc> 
 {
-    using Base = typename node_handle::node_handle_base;
+    using Base = node_handle_base<PolicyTraits, Alloc>;
 
 public:
     using key_type = typename Policy::key_type;
@@ -2920,7 +2920,7 @@ struct InsertReturnType
     NodeType node;
 };
 
-}  // namespace container_internal
+}  // namespace priv
 }  // namespace phmap
 
 
@@ -3142,8 +3142,8 @@ class Span
     static const size_type npos = ~(size_type(0));
 
     constexpr Span() noexcept : Span(nullptr, 0) {}
-    constexpr Span(pointer array, size_type length) noexcept
-        : ptr_(array), len_(length) {}
+    constexpr Span(pointer array, size_type lgth) noexcept
+        : ptr_(array), len_(lgth) {}
 
     // Implicit conversion constructors
     template <size_t N>
@@ -3690,7 +3690,7 @@ constexpr Span<const T> MakeConstSpan(const T (&array)[N]) noexcept {
 #endif
 
 namespace phmap {
-namespace container_internal {
+namespace priv {
 
 // A type wrapper that instructs `Layout` to use the specific alignment for the
 // array. `Layout<..., Aligned<T, N>, ...>` has exactly the same API
@@ -4161,7 +4161,7 @@ class Layout : public internal_layout::LayoutType<sizeof...(Ts), Ts...>
         : internal_layout::LayoutType<sizeof...(Ts), Ts...>(sizes...) {}
 };
 
-}  // namespace container_internal
+}  // namespace priv
 }  // namespace phmap
 
 // ---------------------------------------------------------------------------
@@ -4177,7 +4177,7 @@ class Layout : public internal_layout::LayoutType<sizeof...(Ts), Ts...>
 #endif  // _MSC_VER
 
 namespace phmap {
-namespace container_internal {
+namespace priv {
 
 template <typename... Ts>
 class CompressedTuple;
@@ -4277,7 +4277,7 @@ struct PHMAP_INTERNAL_COMPRESSED_TUPLE_DECLSPEC
 // To access the members, use member .get<N>() function.
 //
 // Eg:
-//   phmap::container_internal::CompressedTuple<int, T1, T2, T3> value(7, t1, t2,
+//   phmap::priv::CompressedTuple<int, T1, T2, T3> value(7, t1, t2,
 //                                                                    t3);
 //   assert(value.get<0>() == 7);
 //   T1& t1 = value.get<1>();
@@ -4329,12 +4329,12 @@ class PHMAP_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTuple
 template <>
 class PHMAP_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTuple<> {};
 
-}  // namespace container_internal
+}  // namespace priv
 }  // namespace phmap
 
 
 namespace phmap {
-namespace container_internal {
+namespace priv {
 
 #ifdef _MSC_VER
     #pragma warning(push)  
@@ -4420,7 +4420,7 @@ inline void SanitizerUnpoisonObject(const T* object) {
     SanitizerUnpoisonMemoryRegion(object, sizeof(T));
 }
 
-}  // namespace container_internal
+}  // namespace priv
 }  // namespace phmap
 
 
@@ -4534,7 +4534,7 @@ inline T& ts_unchecked_read(T& v) PHMAP_NO_THREAD_SAFETY_ANALYSIS {
 
 }  // namespace thread_safety_analysis
 
-namespace container_internal {
+namespace priv {
 
 namespace memory_internal {
 
@@ -4747,7 +4747,7 @@ struct map_slot_policy
     }
 };
 
-}  // namespace container_internal
+}  // namespace priv
 }  // phmap
 
 
@@ -5137,6 +5137,8 @@ class  LockableImpl<phmap::NullMutex>: public phmap::NullMutex
     };
 #endif
 
+#endif // BOOST_THREAD_SHARED_MUTEX_HPP
+
 // --------------------------------------------------------------------------
 //         std::shared_mutex support (read and write lock support)
 // --------------------------------------------------------------------------
@@ -5156,9 +5158,7 @@ class  LockableImpl<phmap::NullMutex>: public phmap::NullMutex
         using UniqueLocks     = typename Base::WriteLocks;
         using UpgradeToUnique = typename Base::DoNothing;  // we already have unique ownership
     };
-#endif
-
-#endif // PHMAP_HAS_BOOST_THREAD_MUTEXES
+#endif // PHMAP_HAVE_SHARED_MUTEX
 
 
 }  // phmap
diff --git a/include/parallel_hashmap/phmap_bits.h b/include/parallel_hashmap/phmap_bits.h
index 7933d8cb5..6b765fff5 100644
--- a/include/parallel_hashmap/phmap_bits.h
+++ b/include/parallel_hashmap/phmap_bits.h
@@ -287,7 +287,7 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountLeadingZeros64(uint64_t n) {
         return (int)(63 - result);
     }
     return 64;
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(__clang__)
     // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse
     unsigned long result = 0;  // NOLINT(runtime/int)
     if ((n >> 32) && _BitScanReverse(&result, (unsigned long)(n >> 32))) {
@@ -297,7 +297,7 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountLeadingZeros64(uint64_t n) {
         return 63 - result;
     }
     return 64;
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
     // Use __builtin_clzll, which uses the following instructions:
     //  x86: bsr
     //  ARM64: clz
@@ -324,13 +324,13 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountLeadingZeros32Slow(uint64_t n) {
 }
 
 PHMAP_BASE_INTERNAL_FORCEINLINE int CountLeadingZeros32(uint32_t n) {
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__clang__)
     unsigned long result = 0;  // NOLINT(runtime/int)
     if (_BitScanReverse(&result, n)) {
         return (int)(31 - result);
     }
     return 32;
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
     // Use __builtin_clz, which uses the following instructions:
     //  x86: bsr
     //  ARM64: clz
@@ -361,11 +361,11 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountTrailingZerosNonZero64Slow(uint64_t n)
 }
 
 PHMAP_BASE_INTERNAL_FORCEINLINE int CountTrailingZerosNonZero64(uint64_t n) {
-#if defined(_MSC_VER) && defined(_M_X64)
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_X64)
     unsigned long result = 0;  // NOLINT(runtime/int)
     _BitScanForward64(&result, n);
     return (int)result;
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(__clang__)
     unsigned long result = 0;  // NOLINT(runtime/int)
     if (static_cast<uint32_t>(n) == 0) {
         _BitScanForward(&result, (unsigned long)(n >> 32));
@@ -373,7 +373,7 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountTrailingZerosNonZero64(uint64_t n) {
     }
     _BitScanForward(&result, (unsigned long)n);
     return result;
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
     static_assert(sizeof(unsigned long long) == sizeof(n),  // NOLINT(runtime/int)
                   "__builtin_ctzll does not take 64-bit arg");
     return __builtin_ctzll(n);
@@ -394,11 +394,11 @@ PHMAP_BASE_INTERNAL_FORCEINLINE int CountTrailingZerosNonZero32Slow(uint32_t n)
 }
 
 PHMAP_BASE_INTERNAL_FORCEINLINE int CountTrailingZerosNonZero32(uint32_t n) {
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__clang__)
     unsigned long result = 0;  // NOLINT(runtime/int)
     _BitScanForward(&result, n);
     return (int)result;
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
     static_assert(sizeof(int) == sizeof(n),
                   "__builtin_ctz does not take 32-bit arg");
     return __builtin_ctz(n);
diff --git a/include/parallel_hashmap/phmap_config.h b/include/parallel_hashmap/phmap_config.h
index dd5a0bf54..fa515025c 100644
--- a/include/parallel_hashmap/phmap_config.h
+++ b/include/parallel_hashmap/phmap_config.h
@@ -109,6 +109,7 @@
 #endif
 
 
+
 // -----------------------------------------------------------------------------
 // Compiler Feature Checks
 // -----------------------------------------------------------------------------
@@ -119,6 +120,14 @@
     #define PHMAP_HAVE_BUILTIN(x) 0
 #endif
 
+#if (defined(_MSVC_LANG) && _MSVC_LANG >= 201703) || __cplusplus >= 201703
+    #define PHMAP_HAVE_CC17 1
+#else
+    #define PHMAP_HAVE_CC17 0
+#endif
+
+#define PHMAP_BRANCHLESS 1
+
 // ----------------------------------------------------------------
 // Checks whether `std::is_trivially_destructible<T>` is supported.
 // ----------------------------------------------------------------
@@ -304,8 +313,7 @@
 
 // #pragma message(PHMAP_VAR_NAME_VALUE(_MSVC_LANG))
 
-#if defined(_MSC_VER) && _MSC_VER >= 1910 && \
-    ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703) || __cplusplus >= 201703)
+#if defined(_MSC_VER) && _MSC_VER >= 1910 && PHMAP_HAVE_CC17
     // #define PHMAP_HAVE_STD_ANY 1
     #define PHMAP_HAVE_STD_OPTIONAL 1
     #define PHMAP_HAVE_STD_VARIANT 1
@@ -314,7 +322,7 @@
     #endif
 #endif
 
-#if (defined(_MSVC_LANG) && _MSVC_LANG >= 201703) || __cplusplus >= 201703
+#if PHMAP_HAVE_CC17
     #define PHMAP_HAVE_SHARED_MUTEX 1
 #endif
 
@@ -330,6 +338,13 @@
     #define PHMAP_INTERNAL_MSVC_2017_DBG_MODE
 #endif
 
+// ---------------------------------------------------------------------------
+// Checks whether wchar_t is treated as a native type
+// (MSVC: /Zc:wchar_t- treats wchar_t as unsigned short)
+// ---------------------------------------------------------------------------
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+#define PHMAP_HAS_NATIVE_WCHAR_T
+#endif
 
 // -----------------------------------------------------------------------------
 // Sanitizer Attributes
@@ -610,7 +625,7 @@
 #endif
 
 #ifndef PHMAP_HAVE_SSSE3
-    #ifdef __SSSE3__
+    #if defined(__SSSE3__) || defined(__AVX2__)
         #define PHMAP_HAVE_SSSE3 1
     #else
         #define PHMAP_HAVE_SSSE3 0
@@ -633,7 +648,7 @@
 // ----------------------------------------------------------------------
 // constexpr if
 // ----------------------------------------------------------------------
-#if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703)
+#if PHMAP_HAVE_CC17
     #define PHMAP_IF_CONSTEXPR(expr) if constexpr ((expr))
 #else 
     #define PHMAP_IF_CONSTEXPR(expr) if ((expr))
diff --git a/include/parallel_hashmap/phmap_dump.h b/include/parallel_hashmap/phmap_dump.h
new file mode 100644
index 000000000..a4b8a59ec
--- /dev/null
+++ b/include/parallel_hashmap/phmap_dump.h
@@ -0,0 +1,260 @@
+#if !defined(phmap_dump_h_guard_)
+#define phmap_dump_h_guard_
+
+// ---------------------------------------------------------------------------
+// Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com
+//
+//       providing dump/load/mmap_load
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// ---------------------------------------------------------------------------
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include "phmap.h"
+namespace phmap
+{
+
+namespace type_traits_internal {
+
+#if defined(__GLIBCXX__) && __GLIBCXX__ < 20150801
+    template<typename T> struct IsTriviallyCopyable : public std::integral_constant<bool, __has_trivial_copy(T)> {};
+#else
+    template<typename T> struct IsTriviallyCopyable : public std::is_trivially_copyable<T> {};
+#endif
+
+template <class T1, class T2>
+struct IsTriviallyCopyable<std::pair<T1, T2>> {
+    static constexpr bool value = IsTriviallyCopyable<T1>::value && IsTriviallyCopyable<T2>::value;
+};
+}
+
+namespace priv {
+
+#if !defined(PHMAP_NON_DETERMINISTIC) && !defined(PHMAP_DISABLE_DUMP)
+
+// ------------------------------------------------------------------------
+// dump/load for raw_hash_set
+// ------------------------------------------------------------------------
+template <class Policy, class Hash, class Eq, class Alloc>
+template<typename OutputArchive>
+bool raw_hash_set<Policy, Hash, Eq, Alloc>::phmap_dump(OutputArchive& ar) const {
+    static_assert(type_traits_internal::IsTriviallyCopyable<value_type>::value,
+                    "value_type should be trivially copyable");
+
+    ar.saveBinary(&size_, sizeof(size_t));
+    if (size_ == 0)
+        return true;
+    ar.saveBinary(&capacity_, sizeof(size_t));
+    ar.saveBinary(ctrl_,  sizeof(ctrl_t) * (capacity_ + Group::kWidth + 1));
+    ar.saveBinary(slots_, sizeof(slot_type) * capacity_);
+    return true;
+}
+
+template <class Policy, class Hash, class Eq, class Alloc>
+template<typename InputArchive>
+bool raw_hash_set<Policy, Hash, Eq, Alloc>::phmap_load(InputArchive& ar) {
+    static_assert(type_traits_internal::IsTriviallyCopyable<value_type>::value,
+                    "value_type should be trivially copyable");
+    raw_hash_set<Policy, Hash, Eq, Alloc>().swap(*this); // clear any existing content
+    ar.loadBinary(&size_, sizeof(size_t));
+    if (size_ == 0)
+        return true;
+    ar.loadBinary(&capacity_, sizeof(size_t));
+
+    // allocate memory for ctrl_ and slots_
+    initialize_slots(capacity_);
+    ar.loadBinary(ctrl_,  sizeof(ctrl_t) * (capacity_ + Group::kWidth + 1));
+    ar.loadBinary(slots_, sizeof(slot_type) * capacity_);
+    return true;
+}
+
+// ------------------------------------------------------------------------
+// dump/load for parallel_hash_set
+// ------------------------------------------------------------------------
+template <size_t N,
+          template <class, class, class, class> class RefSet,
+          class Mtx_,
+          class Policy, class Hash, class Eq, class Alloc>
+template<typename OutputArchive>
+bool parallel_hash_set<N, RefSet, Mtx_, Policy, Hash, Eq, Alloc>::phmap_dump(OutputArchive& ar) const {
+    static_assert(type_traits_internal::IsTriviallyCopyable<value_type>::value,
+                  "value_type should be trivially copyable");
+
+    size_t submap_count = subcnt();
+    ar.saveBinary(&submap_count, sizeof(size_t));
+    for (size_t i = 0; i < sets_.size(); ++i) {
+        auto& inner = sets_[i];
+        typename Lockable::UniqueLock m(const_cast<Inner&>(inner));
+        if (!inner.set_.phmap_dump(ar)) {
+            std::cerr << "Failed to dump submap " << i << std::endl;
+            return false;
+        }
+    }
+    return true;
+}
+
+template <size_t N,
+          template <class, class, class, class> class RefSet,
+          class Mtx_,
+          class Policy, class Hash, class Eq, class Alloc>
+template<typename InputArchive>
+bool parallel_hash_set<N, RefSet, Mtx_, Policy, Hash, Eq, Alloc>::phmap_load(InputArchive& ar) {
+    static_assert(type_traits_internal::IsTriviallyCopyable<value_type>::value,
+                  "value_type should be trivially copyable");
+
+    size_t submap_count = 0;
+    ar.loadBinary(&submap_count, sizeof(size_t));
+    if (submap_count != subcnt()) {
+        std::cerr << "submap count(" << submap_count << ") != N(" << N << ")" << std::endl;
+        return false;
+    }
+
+    for (size_t i = 0; i < submap_count; ++i) {            
+        auto& inner = sets_[i];
+        typename Lockable::UniqueLock m(const_cast<Inner&>(inner));
+        if (!inner.set_.phmap_load(ar)) {
+            std::cerr << "Failed to load submap " << i << std::endl;
+            return false;
+        }
+    }
+    return true;
+}
+
+#endif // !defined(PHMAP_NON_DETERMINISTIC) && !defined(PHMAP_DISABLE_DUMP)
+
+} // namespace priv
+
+
+
+// ------------------------------------------------------------------------
+// BinaryArchive
+//       File is closed when archive object is destroyed
+// ------------------------------------------------------------------------
+
+// ------------------------------------------------------------------------
+// ------------------------------------------------------------------------
+class BinaryOutputArchive {
+public:
+    BinaryOutputArchive(const char *file_path) {
+        ofs_.open(file_path, std::ofstream::out | std::ofstream::trunc | std::ofstream::binary);
+    }
+
+    bool saveBinary(const void *p, size_t sz) {
+        ofs_.write(reinterpret_cast<const char*>(p), sz);
+        return true;
+    }
+
+private:
+    std::ofstream ofs_;
+};
+
+
+class BinaryInputArchive {
+public:
+    BinaryInputArchive(const char * file_path) {
+        ifs_.open(file_path, std::ofstream::in | std::ofstream::binary);
+    }
+
+    bool loadBinary(void* p, size_t sz) {
+        ifs_.read(reinterpret_cast<char*>(p), sz);
+        return true;
+    }
+
+private:
+    std::ifstream ifs_;
+};
+
+} // namespace phmap
+
+
+#ifdef CEREAL_SIZE_TYPE
+
+template <class T>
+using PhmapTrivCopyable = typename phmap::type_traits_internal::IsTriviallyCopyable<T>;
+    
+namespace cereal
+{
+    // Overload Cereal serialization code for phmap::flat_hash_map
+    // -----------------------------------------------------------
+    template <class K, class V, class Hash, class Eq, class A>
+    void save(typename std::enable_if<PhmapTrivCopyable<K>::value && PhmapTrivCopyable<V>::value, typename cereal::BinaryOutputArchive>::type &ar,
+              phmap::flat_hash_map<K, V, Hash, Eq, A> const &hmap)
+    {
+        hmap.phmap_dump(ar);
+    }
+
+    template <class K, class V, class Hash, class Eq, class A>
+    void load(typename std::enable_if<PhmapTrivCopyable<K>::value && PhmapTrivCopyable<V>::value, typename cereal::BinaryInputArchive>::type &ar, 
+              phmap::flat_hash_map<K, V, Hash, Eq, A>  &hmap)
+    {
+        hmap.phmap_load(ar);
+    }
+
+
+    // Overload Cereal serialization code for phmap::parallel_flat_hash_map
+    // --------------------------------------------------------------------
+    template <class K, class V, class Hash, class Eq, class A, size_t N, class Mtx_>
+    void save(typename std::enable_if<PhmapTrivCopyable<K>::value && PhmapTrivCopyable<V>::value, typename cereal::BinaryOutputArchive>::type &ar,
+              phmap::parallel_flat_hash_map<K, V, Hash, Eq, A, N, Mtx_> const &hmap)
+    {
+        hmap.phmap_dump(ar);
+    }
+
+    template <class K, class V, class Hash, class Eq, class A, size_t N, class Mtx_>
+    void load(typename std::enable_if<PhmapTrivCopyable<K>::value && PhmapTrivCopyable<V>::value, typename cereal::BinaryInputArchive>::type &ar, 
+              phmap::parallel_flat_hash_map<K, V, Hash, Eq, A, N, Mtx_>  &hmap)
+    {
+        hmap.phmap_load(ar);
+    }
+
+    // Overload Cereal serialization code for phmap::flat_hash_set
+    // -----------------------------------------------------------
+    template <class K, class Hash, class Eq, class A>
+    void save(typename std::enable_if<PhmapTrivCopyable<K>::value, typename cereal::BinaryOutputArchive>::type &ar,
+              phmap::flat_hash_set<K, Hash, Eq, A> const &hset)
+    {
+        hset.phmap_dump(ar);
+    }
+
+    template <class K, class Hash, class Eq, class A>
+    void load(typename std::enable_if<PhmapTrivCopyable<K>::value, typename cereal::BinaryInputArchive>::type &ar, 
+              phmap::flat_hash_set<K, Hash, Eq, A>  &hset)
+    {
+        hset.phmap_load(ar);
+    }
+
+    // Overload Cereal serialization code for phmap::parallel_flat_hash_set
+    // --------------------------------------------------------------------
+    template <class K, class Hash, class Eq, class A, size_t N, class Mtx_>
+    void save(typename std::enable_if<PhmapTrivCopyable<K>::value, typename cereal::BinaryOutputArchive>::type &ar,
+              phmap::parallel_flat_hash_set<K, Hash, Eq, A, N, Mtx_> const &hset)
+    {
+        hset.phmap_dump(ar);
+    }
+
+    template <class K, class Hash, class Eq, class A, size_t N, class Mtx_>
+    void load(typename std::enable_if<PhmapTrivCopyable<K>::value, typename cereal::BinaryInputArchive>::type &ar, 
+              phmap::parallel_flat_hash_set<K, Hash, Eq, A, N, Mtx_>  &hset)
+    {
+        hset.phmap_load(ar);
+    }
+}
+
+#endif
+
+
+
+
+#endif // phmap_dump_h_guard_
diff --git a/include/parallel_hashmap/phmap_fwd_decl.h b/include/parallel_hashmap/phmap_fwd_decl.h
index f90417d53..a7719c494 100644
--- a/include/parallel_hashmap/phmap_fwd_decl.h
+++ b/include/parallel_hashmap/phmap_fwd_decl.h
@@ -40,7 +40,7 @@ namespace phmap {
 
     class NullMutex;
 
-    namespace container_internal {
+    namespace priv {
 
         // The hash of an object of type T is computed by using phmap::Hash.
         template <class T, class E = void>
@@ -51,10 +51,10 @@ namespace phmap {
         };
 
         template <class T>
-        using hash_default_hash = typename container_internal::HashEq<T>::Hash;
+        using hash_default_hash = typename priv::HashEq<T>::Hash;
 
         template <class T>
-        using hash_default_eq = typename container_internal::HashEq<T>::Eq;
+        using hash_default_eq = typename priv::HashEq<T>::Eq;
 
         // type alias for std::allocator so we can forward declare without including other headers
         template <class T>  
@@ -64,65 +64,65 @@ namespace phmap {
         template<class T1, class T2> 
         using Pair = typename phmap::Pair<T1, T2>;
 
-    }  // namespace container_internal
+    }  // namespace priv
 
     // ------------- forward declarations for hash containers ----------------------------------
     template <class T, 
-              class Hash  = phmap::container_internal::hash_default_hash<T>,
-              class Eq    = phmap::container_internal::hash_default_eq<T>,
-              class Alloc = phmap::container_internal::Allocator<T>>  // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<T>,
+              class Eq    = phmap::priv::hash_default_eq<T>,
+              class Alloc = phmap::priv::Allocator<T>>  // alias for std::allocator
         class flat_hash_set;
 
     template <class K, class V,
-              class Hash  = phmap::container_internal::hash_default_hash<K>,
-              class Eq    = phmap::container_internal::hash_default_eq<K>,
-              class Alloc = phmap::container_internal::Allocator<
-                            phmap::container_internal::Pair<const K, V>>> // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<K>,
+              class Eq    = phmap::priv::hash_default_eq<K>,
+              class Alloc = phmap::priv::Allocator<
+                            phmap::priv::Pair<const K, V>>> // alias for std::allocator
         class flat_hash_map;
     
     template <class T, 
-              class Hash  = phmap::container_internal::hash_default_hash<T>,
-              class Eq    = phmap::container_internal::hash_default_eq<T>,
-              class Alloc = phmap::container_internal::Allocator<T>> // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<T>,
+              class Eq    = phmap::priv::hash_default_eq<T>,
+              class Alloc = phmap::priv::Allocator<T>> // alias for std::allocator
         class node_hash_set;
 
     template <class Key, class Value,
-              class Hash  = phmap::container_internal::hash_default_hash<Key>,
-              class Eq    = phmap::container_internal::hash_default_eq<Key>,
-              class Alloc = phmap::container_internal::Allocator<
-                            phmap::container_internal::Pair<const Key, Value>>> // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<Key>,
+              class Eq    = phmap::priv::hash_default_eq<Key>,
+              class Alloc = phmap::priv::Allocator<
+                            phmap::priv::Pair<const Key, Value>>> // alias for std::allocator
         class node_hash_map;
 
     template <class T,
-              class Hash  = phmap::container_internal::hash_default_hash<T>,
-              class Eq    = phmap::container_internal::hash_default_eq<T>,
-              class Alloc = phmap::container_internal::Allocator<T>, // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<T>,
+              class Eq    = phmap::priv::hash_default_eq<T>,
+              class Alloc = phmap::priv::Allocator<T>, // alias for std::allocator
               size_t N    = 4,                  // 2**N submaps
               class Mutex = phmap::NullMutex>   // use std::mutex to enable internal locks
         class parallel_flat_hash_set;
 
     template <class K, class V,
-              class Hash  = phmap::container_internal::hash_default_hash<K>,
-              class Eq    = phmap::container_internal::hash_default_eq<K>,
-              class Alloc = phmap::container_internal::Allocator<
-                            phmap::container_internal::Pair<const K, V>>, // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<K>,
+              class Eq    = phmap::priv::hash_default_eq<K>,
+              class Alloc = phmap::priv::Allocator<
+                            phmap::priv::Pair<const K, V>>, // alias for std::allocator
               size_t N    = 4,                  // 2**N submaps
               class Mutex = phmap::NullMutex>   // use std::mutex to enable internal locks
         class parallel_flat_hash_map;
 
     template <class T, 
-              class Hash  = phmap::container_internal::hash_default_hash<T>,
-              class Eq    = phmap::container_internal::hash_default_eq<T>,
-              class Alloc = phmap::container_internal::Allocator<T>, // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<T>,
+              class Eq    = phmap::priv::hash_default_eq<T>,
+              class Alloc = phmap::priv::Allocator<T>, // alias for std::allocator
               size_t N    = 4,                  // 2**N submaps
               class Mutex = phmap::NullMutex>   // use std::mutex to enable internal locks
         class parallel_node_hash_set;
 
     template <class Key, class Value,
-              class Hash  = phmap::container_internal::hash_default_hash<Key>,
-              class Eq    = phmap::container_internal::hash_default_eq<Key>,
-              class Alloc = phmap::container_internal::Allocator<
-                            phmap::container_internal::Pair<const Key, Value>>, // alias for std::allocator
+              class Hash  = phmap::priv::hash_default_hash<Key>,
+              class Eq    = phmap::priv::hash_default_eq<Key>,
+              class Alloc = phmap::priv::Allocator<
+                            phmap::priv::Pair<const Key, Value>>, // alias for std::allocator
               size_t N    = 4,                  // 2**N submaps
               class Mutex = phmap::NullMutex>   // use std::mutex to enable internal locks
         class parallel_node_hash_map;
@@ -137,11 +137,11 @@ namespace phmap {
         class btree_multiset;
 
     template <typename Key, typename Value, typename Compare = phmap::Less<Key>,
-              typename Alloc = phmap::Allocator<phmap::container_internal::Pair<const Key, Value>>>
+              typename Alloc = phmap::Allocator<phmap::priv::Pair<const Key, Value>>>
         class btree_map;
     
     template <typename Key, typename Value, typename Compare = phmap::Less<Key>,
-              typename Alloc = phmap::Allocator<phmap::container_internal::Pair<const Key, Value>>>
+              typename Alloc = phmap::Allocator<phmap::priv::Pair<const Key, Value>>>
         class btree_multimap;
 
 }  // namespace phmap
diff --git a/include/parallel_hashmap/phmap_utils.h b/include/parallel_hashmap/phmap_utils.h
index 72d4e716f..1d0c47284 100644
--- a/include/parallel_hashmap/phmap_utils.h
+++ b/include/parallel_hashmap/phmap_utils.h
@@ -209,12 +209,14 @@ struct Hash<unsigned char> : public phmap_unary_function<unsigned char, size_t>
     { return static_cast<size_t>(val); }
 };
 
+#ifdef PHMAP_HAS_NATIVE_WCHAR_T
 template <>
 struct Hash<wchar_t> : public phmap_unary_function<wchar_t, size_t>
 {
     inline size_t operator()(wchar_t val) const noexcept
     { return static_cast<size_t>(val); }
 };
+#endif
 
 template <>
 struct Hash<int16_t> : public phmap_unary_function<int16_t, size_t>
diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh
index d32e1313c..4bd4d27e0 100755
--- a/scripts/fetchPufferfish.sh
+++ b/scripts/fetchPufferfish.sh
@@ -22,8 +22,8 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then
     rm -fr ${INSTALL_DIR}/src/pufferfish
 fi
 
-SVER=salmon-v1.6.0
-#SVER=develop
+#SVER=salmon-v1.6.0
+SVER=develop
 #SVER=sketch-mode
 
 EXPECTED_SHA256=f71b3c08f254200fcdc2eb8fe3dcca8a8e9489e79ef5952a4958d8b9979831dc
diff --git a/src/FastxParser.cpp b/src/FastxParser.cpp
index 51273b10d..2805b3ea4 100644
--- a/src/FastxParser.cpp
+++ b/src/FastxParser.cpp
@@ -14,9 +14,6 @@
 #include <vector>
 #include <zlib.h>
 
-// STEP 1: declare the type of file handler and the read() function
-KSEQ_INIT(gzFile, gzread)
-
 namespace fastx_parser {
 template <typename T>
 FastxParser<T>::FastxParser(std::vector<std::string> files,
@@ -132,23 +129,8 @@ template <typename T> FastxParser<T>::~FastxParser() {
     return ret;
   }
 
-inline void copyRecord(kseq_t* seq, ReadSeq* s) {
-  // Copy over the sequence and read name
-  s->seq.assign(seq->seq.s, seq->seq.l);
-  s->name.assign(seq->name.s, seq->name.l);
-}
-
-inline void copyRecord(kseq_t* seq, ReadQual* s) {
-    // Copy over the sequence and read name 
-    // and quality
-    s->seq.assign(seq->seq.s, seq->seq.l);
-    s->name.assign(seq->name.s, seq->name.l);
-    s->qual.assign(seq->qual.s, seq->qual.l);
-}
-
-
 template <typename T>
-int parseReads(
+int parse_reads(
     std::vector<std::string>& inputStreams, std::atomic<uint32_t>& numParsing,
     moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead,
     moodycamel::ConcurrentQueue<uint32_t>& workQueue,
@@ -156,13 +138,14 @@ int parseReads(
         seqContainerQueue_,
     moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>& readQueue_) {
 
+  using namespace klibpp;
   using fastx_parser::thread_utils::MIN_BACKOFF_ITERS;
   auto curMaxDelay = MIN_BACKOFF_ITERS;
-  kseq_t* seq;
   T* s;
+
   uint32_t fn{0};
   while (workQueue.try_dequeue(fn)) {
-    auto file = inputStreams[fn];
+    auto& file = inputStreams[fn];
     std::unique_ptr<ReadChunk<T>> local;
     while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
       fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
@@ -171,19 +154,16 @@ int parseReads(
     }
     size_t numObtained{local->size()};
     // open the file and init the parser
-    auto fp = gzopen(file.c_str(), "r");
+    gzFile fp = gzopen(file.c_str(), "r");
 
     // The number of reads we have in the local vector
     size_t numWaiting{0};
 
-    seq = kseq_init(fp);
-    int ksv = kseq_read(seq);
-
-    while (ksv >= 0) {
-      s = &((*local)[numWaiting++]);
-
-      copyRecord(seq, s);
+    auto seq = make_kstream(fp, gzread, mode::in);
 
+    s = &((*local)[numWaiting]);
+    while ( seq >> *s ) { //ksv >= 0
+      numWaiting++;
       // If we've filled the local vector, then dump to the concurrent queue
       if (numWaiting == numObtained) {
         curMaxDelay = MIN_BACKOFF_ITERS;
@@ -199,15 +179,18 @@ int parseReads(
         }
         numObtained = local->size();
       }
-      ksv = kseq_read(seq);
+      s = &((*local)[numWaiting]);
     }
 
-    if (ksv == -3) {
+    // if we had an error in the stream
+    if (seq.err()) {
       --numParsing;
       return -3;
-    } else if (ksv < -1) {
+    } else if (seq.tqs()) {
+      // if we had a quality string of the wrong length
+      // tqs == truncated quality string
       --numParsing;
-      return ksv;
+      return -2;
     }
 
     // If we hit the end of the file and have any reads in our local buffer
@@ -226,7 +209,6 @@ int parseReads(
       }
     }
     // destroy the parser and close the file
-    kseq_destroy(seq);
     gzclose(fp);
   }
 
@@ -235,7 +217,7 @@ int parseReads(
 }
 
 template <typename T>
-int parseReadPair(
+int parse_read_pairs(
     std::vector<std::string>& inputStreams,
     std::vector<std::string>& inputStreams2, std::atomic<uint32_t>& numParsing,
     moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead,
@@ -244,10 +226,9 @@ int parseReadPair(
         seqContainerQueue_,
     moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>& readQueue_) {
 
+  using namespace klibpp;
   using fastx_parser::thread_utils::MIN_BACKOFF_ITERS;
   size_t curMaxDelay = MIN_BACKOFF_ITERS;
-  kseq_t* seq;
-  kseq_t* seq2;
   T* s;
 
   uint32_t fn{0};
@@ -263,24 +244,20 @@ int parseReadPair(
       // std::cerr << "couldn't dequeue read chunk\n";
     }
     size_t numObtained{local->size()};
+
     // open the file and init the parser
-    auto fp = gzopen(file.c_str(), "r");
-    auto fp2 = gzopen(file2.c_str(), "r");
+    gzFile fp = gzopen(file.c_str(), "r");
+    gzFile fp2 = gzopen(file2.c_str(), "r");
 
     // The number of reads we have in the local vector
     size_t numWaiting{0};
 
-    seq = kseq_init(fp);
-    seq2 = kseq_init(fp2);
-
-    int ksv = kseq_read(seq);
-    int ksv2 = kseq_read(seq2);
-    while (ksv >= 0 and ksv2 >= 0) {
-
-      s = &((*local)[numWaiting++]);
-      copyRecord(seq, &s->first);
-      copyRecord(seq2, &s->second);
+    auto seq = make_kstream(fp, gzread, mode::in);
+    auto seq2 = make_kstream(fp2, gzread, mode::in);
 
+    s = &((*local)[numWaiting]);
+    while ( (seq >> s->first) and (seq2 >> s->second) ) {//ksv >= 0 and ksv2 >= 0) {
+      numWaiting++;
       // If we've filled the local vector, then dump to the concurrent queue
       if (numWaiting == numObtained) {
         curMaxDelay = MIN_BACKOFF_ITERS;
@@ -296,16 +273,18 @@ int parseReadPair(
         }
         numObtained = local->size();
       }
-      ksv = kseq_read(seq);
-      ksv2 = kseq_read(seq2);
+      s = &((*local)[numWaiting]);
     }
 
-    if (ksv == -3 or ksv2 == -3) {
+    // if we had an error in the stream
+    if (seq.err() or seq2.err()) {
       --numParsing;
       return -3;
-    } else if (ksv < -1 or ksv2 < -1) {
+    } else if (seq.tqs() or seq2.tqs()) {
+      // if we had a quality string of the wrong length
+      // tqs == truncated quality string
       --numParsing;
-      return std::min(ksv, ksv2);
+      return -2;
     }
 
     // If we hit the end of the file and have any reads in our local buffer
@@ -324,9 +303,7 @@ int parseReadPair(
       }
     }
     // destroy the parser and close the file
-    kseq_destroy(seq);
     gzclose(fp);
-    kseq_destroy(seq2);
     gzclose(fp2);
   }
 
@@ -342,7 +319,7 @@ template <> bool FastxParser<ReadSeq>::start() {
     for (size_t i = 0; i < numParsers_; ++i) {
       ++numParsing_;
       parsingThreads_.emplace_back(new std::thread([this, i]() {
-        this->threadResults_[i] = parseReads(this->inputStreams_, this->numParsing_,
+        this->threadResults_[i] = parse_reads(this->inputStreams_, this->numParsing_,
                    this->consumeContainers_[i].get(),
                    this->produceReads_[i].get(), this->workQueue_,
                    this->seqContainerQueue_, this->readQueue_);
@@ -377,7 +354,7 @@ template <> bool FastxParser<ReadPair>::start() {
     for (size_t i = 0; i < numParsers_; ++i) {
       ++numParsing_;
       parsingThreads_.emplace_back(new std::thread([this, i]() {
-            this->threadResults_[i] = parseReadPair(this->inputStreams_, this->inputStreams2_,
+            this->threadResults_[i] = parse_read_pairs(this->inputStreams_, this->inputStreams2_,
                       this->numParsing_, this->consumeContainers_[i].get(),
                       this->produceReads_[i].get(), this->workQueue_,
                       this->seqContainerQueue_, this->readQueue_);
@@ -389,26 +366,6 @@ template <> bool FastxParser<ReadPair>::start() {
   }
 }
 
-template <> bool FastxParser<ReadQual>::start() {
-    if (numParsing_ == 0) {
-    isActive_ = true;
-    threadResults_.resize(numParsers_);
-    std::fill(threadResults_.begin(), threadResults_.end(), 0);
-    for (size_t i = 0; i < numParsers_; ++i) {
-      ++numParsing_;
-      parsingThreads_.emplace_back(new std::thread([this, i]() {
-        this->threadResults_[i] = parseReads(this->inputStreams_, this->numParsing_,
-                   this->consumeContainers_[i].get(),
-                   this->produceReads_[i].get(), this->workQueue_,
-                   this->seqContainerQueue_, this->readQueue_);
-      }));
-    }
-    return true;
-  } else {
-    return false;
-  }
-}
-
 template <> bool FastxParser<ReadQualPair>::start() {
   if (numParsing_ == 0) {
     isActive_ = true;
@@ -432,7 +389,7 @@ template <> bool FastxParser<ReadQualPair>::start() {
     for (size_t i = 0; i < numParsers_; ++i) {
       ++numParsing_;
       parsingThreads_.emplace_back(new std::thread([this, i]() {
-            this->threadResults_[i] = parseReadPair(this->inputStreams_, this->inputStreams2_,
+            this->threadResults_[i] = parse_read_pairs(this->inputStreams_, this->inputStreams2_,
                       this->numParsing_, this->consumeContainers_[i].get(),
                       this->produceReads_[i].get(), this->workQueue_,
                       this->seqContainerQueue_, this->readQueue_);
@@ -467,6 +424,6 @@ template <typename T> void FastxParser<T>::finishedWithGroup(ReadGroup<T>& s) {
 
 template class FastxParser<ReadSeq>;
 template class FastxParser<ReadPair>;
-template class FastxParser<ReadQual>;
+//template class FastxParser<ReadQual>;
 template class FastxParser<ReadQualPair>;
 }
diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 35816c503..45eb998cd 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -1989,10 +1989,12 @@ void sc_align_read_library(ReadExperimentT& readExp,
 
     size_t numFiles = rl.mates1().size() + rl.mates2().size();
     uint32_t numParsingThreads{1};
-    // HACK!
-    // if we have more than 1 set of input files and the thread count is 
-    // greater than 8, then dedicate a second thread to parsing.
-    if (rl.mates1().size() > 1 and numThreads > 8) { numParsingThreads = 2; numThreads -= 1;}
+    // Currently just a heuristic, a better way to do this would be
+    // to have a joint thread-pool where threads could move between
+    // being parsers and consumers.
+    bool _did_modify = salmon::utils::configure_parsing(rl.mates1().size(), numThreads, numParsingThreads);
+    //if (rl.mates1().size() > 1 and numThreads > 8) { numParsingThreads = 2; numThreads -= 1;}
+
     pairedParserPtr.reset(new paired_parser(rl.mates1(), rl.mates2(), numThreads, numParsingThreads, miniBatchSize));
     pairedParserPtr->start();
 
diff --git a/src/SalmonUtils.cpp b/src/SalmonUtils.cpp
index 355afb5c8..cfd1d7e29 100644
--- a/src/SalmonUtils.cpp
+++ b/src/SalmonUtils.cpp
@@ -1373,6 +1373,21 @@ std::string getCurrentTimeAsString() {
   return str;
 }
 
+// encodes the heuristic for guessing how threads should
+// be allocated based on the available reads
+// returns true if input was modified and false otherwise.
+bool configure_parsing(size_t nfiles,             // input param
+                       size_t& worker_threads,    // input/output param
+                       uint32_t& parse_threads     // input/output param
+) {
+  bool modified = false;
+  if (nfiles > 1 and worker_threads >= 8) { parse_threads = 2; worker_threads -= 1;}
+  // if still more
+  // if (nfiles > 2 and worker_threads >= 16) { parse_threads = 3; worker_threads -= 1;}
+  return modified;
+}
+
+
   bool validateOptionsAlignment_(
                                  SalmonOpts& sopt,
                                  boost::program_options::variables_map& vm

From 7674ea3e04a17bf20f774f0a846ad3ecd69e5f26 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Fri, 21 Jan 2022 22:32:59 -0500
Subject: [PATCH 10/13] use local protocol, convert from chobo to itlib

---
 include/SalmonMappingUtils.hpp  | 4 ++--
 include/SingleCellProtocols.hpp | 6 +++---
 scripts/fetchPufferfish.sh      | 4 ++--
 src/SalmonAlevin.cpp            | 9 ++++-----
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/SalmonMappingUtils.hpp b/include/SalmonMappingUtils.hpp
index 8f4c8c261..7e143dfdc 100644
--- a/include/SalmonMappingUtils.hpp
+++ b/include/SalmonMappingUtils.hpp
@@ -56,7 +56,7 @@
 #include "pufferfish/ksw2pp/KSW2Aligner.hpp"
 #include "pufferfish/metro/metrohash64.h"
 #include "pufferfish/SelectiveAlignmentUtils.hpp"
-#include "pufferfish/chobo/small_vector.hpp"
+#include "pufferfish/itlib/small_vector.hpp"
 #include "parallel_hashmap/phmap.h"
 
 namespace salmon {
@@ -122,7 +122,7 @@ namespace salmon {
       int32_t secondBestScore;
       int32_t bestDecoyScore;
       double decoyThresh;
-      chobo::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
+      itlib::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
       bool collect_decoy_info_;
       std::vector<int32_t> scores_;
       phmap::flat_hash_map<uint32_t, std::pair<int32_t, int32_t>> bestScorePerTranscript_;
diff --git a/include/SingleCellProtocols.hpp b/include/SingleCellProtocols.hpp
index bd93cc256..1db3592dc 100644
--- a/include/SingleCellProtocols.hpp
+++ b/include/SingleCellProtocols.hpp
@@ -6,7 +6,7 @@
 
 #include "AlevinOpts.hpp"
 #include "AlevinTypes.hpp"
-#include "pufferfish/chobo/static_vector.hpp"
+#include "pufferfish/itlib/static_vector.hpp"
 
 namespace alevin{
   namespace protocols {
@@ -15,8 +15,8 @@ namespace alevin{
     struct TagGeometry {
       // uint32_t read_num{0};
       // tuples are read_num, start_pos, length
-      chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs1{};
-      chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs2{};
+      itlib::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs1{};
+      itlib::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs2{};
       // the total length of the tag on read 1 
       size_t length1{0};
       // the total length of the tag on read 2
diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh
index 4bd4d27e0..80e4b42ae 100755
--- a/scripts/fetchPufferfish.sh
+++ b/scripts/fetchPufferfish.sh
@@ -89,7 +89,7 @@ cp -r ${EXTERNAL_DIR}/pufferfish/include/libdivide ${INSTALL_DIR}/include/puffer
 cp -r ${EXTERNAL_DIR}/pufferfish/include/ksw2pp ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/compact_vector ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/metro ${INSTALL_DIR}/include/pufferfish
-cp -r ${EXTERNAL_DIR}/pufferfish/include/chobo ${INSTALL_DIR}/include/pufferfish
+cp -r ${EXTERNAL_DIR}/pufferfish/include/itlib ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/sparsepp ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/simde ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/tsl ${INSTALL_DIR}/include/pufferfish
@@ -116,7 +116,7 @@ cp ${EXTERNAL_DIR}/pufferfish/src/rank9b.cpp ${INSTALL_DIR}/src/pufferfish
 #cp -r ${EXTERNAL_DIR}/RapMap/include/*.hpp ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/sparsepp ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/digestpp ${INSTALL_DIR}/include/rapmap
-#cp -r ${EXTERNAL_DIR}/RapMap/include/chobo ${INSTALL_DIR}/include/rapmap
+#cp -r ${EXTERNAL_DIR}/RapMap/include/itlib ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/metro ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/ksw2pp ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/tsl ${INSTALL_DIR}/include/rapmap
diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp
index 45eb998cd..2dde45e84 100644
--- a/src/SalmonAlevin.cpp
+++ b/src/SalmonAlevin.cpp
@@ -131,7 +131,7 @@
 #include "pufferfish/ksw2pp/KSW2Aligner.hpp"
 #include "pufferfish/metro/metrohash64.h"
 #include "parallel_hashmap/phmap.h"
-#include "pufferfish/chobo/static_vector.hpp"
+#include "pufferfish/itlib/static_vector.hpp"
 #include "pufferfish/SelectiveAlignmentUtils.hpp"
 
 namespace alevin{
@@ -717,7 +717,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
             if (isUmiIdxOk) {
               jointHitGroup.setUMI(umiIdx.word(0));
               bool rh = false;
-              std::string* readSubSeq = aut::getReadSequence(alevinOpts.protocol, rp.first.seq, rp.second.seq, readBuffer);
+              std::string* readSubSeq = aut::getReadSequence(localProtocol, rp.first.seq, rp.second.seq, readBuffer);
               rh = tooShortRight
                        ? false
                        : memCollector.get_raw_hits_sketch(*readSubSeq,
@@ -1242,8 +1242,7 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea
                 }
               } else {
                 */
-              readSubSeq = aut::getReadSequence(
-                  alevinOpts.protocol, rp.first.seq, rp.second.seq, readBuffer);
+              readSubSeq = aut::getReadSequence(localProtocol, rp.first.seq, rp.second.seq, readBuffer);
               auto rh = tooShortRight ? false
                                       : memCollector(*readSubSeq, qc,
                                                      true, // isLeft
@@ -1756,7 +1755,7 @@ void processReadsQuasi(
                 }
               } else {
               */
-              readSubSeq = aut::getReadSequence(alevinOpts.protocol, rp.first.seq, rp.second.seq, readBuffer);
+              readSubSeq = aut::getReadSequence(localProtocol, rp.first.seq, rp.second.seq, readBuffer);
               auto rh = tooShortRight ? false
                                       : memCollector(*readSubSeq, qc,
                                                      true, // isLeft

From b0fd829eed5963d5533efecf65026d2a23e06270 Mon Sep 17 00:00:00 2001
From: rob-p <rob.patro@gmail.com>
Date: Tue, 15 Feb 2022 17:23:34 -0500
Subject: [PATCH 11/13] small cmake update

---
 CMakeLists.txt           | 6 +++---
 include/SalmonConfig.hpp | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1be02ab69..01366bbcb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -428,9 +428,9 @@ endif()
 ##
 # Set the latest version and look for what we need
 ##
-set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62.0" "1.63.0" "1.64.0" "1.65.0" "1.66.0" "1.67.0" "1.68.0" "1.69.0" "1.70.0" "1.71.0")
+set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62.0" "1.63.0" "1.64.0" "1.65.0" "1.66.0" "1.67.0" "1.68.0" "1.69.0" "1.70.0" "1.71.0" "1.72.0" "1.73.0" "1.74.0"  "1.75.0" "1.76.0" "1.77.0" "1.78.0")
 if (NOT BOOST_RECONFIGURE)
-find_package(Boost 1.59.0 COMPONENTS iostreams filesystem system timer chrono program_options)
+find_package(Boost 1.59.0 COMPONENTS iostreams system filesystem timer chrono program_options)
 message("BOOST_INCLUDEDIR = ${BOOST_INCLUDEDIR}")
 message("BOOST_LIBRARYDIR = ${BOOST_LIBRARYDIR}")
 message("Boost_FOUND = ${Boost_FOUND}")
@@ -459,7 +459,7 @@ if(BOOST_RECONFIGURE)
   set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
   set(Boost_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
   set(Boost_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)
-  find_package(Boost 1.59.0 COMPONENTS iostreams filesystem system timer chrono program_options locale REQUIRED)
+  find_package(Boost 1.59.0 COMPONENTS iostreams system filesystem timer chrono program_options locale REQUIRED)
   set(FETCH_BOOST FALSE)
 endif()
 
diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp
index f50fb8a5b..a782d3bf6 100644
--- a/include/SalmonConfig.hpp
+++ b/include/SalmonConfig.hpp
@@ -26,9 +26,9 @@
 
 namespace salmon {
 constexpr char majorVersion[] = "1";
-constexpr char minorVersion[] = "6";
+constexpr char minorVersion[] = "7";
 constexpr char patchVersion[] = "0";
-constexpr char version[] = "1.6.0";
+constexpr char version[] = "1.7.0";
 constexpr uint32_t indexVersion = 5;
 constexpr char requiredQuasiIndexVersion[] = "p7";
 } // namespace salmon

From c4513dc78fbad42334579f734acb14f2a3263f80 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Tue, 15 Feb 2022 23:32:47 -0500
Subject: [PATCH 12/13] run version bump

---
 current_version.txt  | 2 +-
 doc/source/conf.py   | 4 ++--
 docker/Dockerfile    | 2 +-
 docker/build_test.sh | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/current_version.txt b/current_version.txt
index be8fab66b..5b62d1631 100644
--- a/current_version.txt
+++ b/current_version.txt
@@ -1,3 +1,3 @@
 VERSION_MAJOR 1
-VERSION_MINOR 6
+VERSION_MINOR 7
 VERSION_PATCH 0
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 6f0a41f12..cc7d9db08 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -55,9 +55,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.6'
+version = '1.7'
 # The full version, including alpha/beta/rc tags.
-release = '1.6.0'
+release = '1.7.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 3715a9163..10e6a8a13 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,7 +6,7 @@ MAINTAINER salmon.maintainer@gmail.com
 
 ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
     ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
-ENV SALMON_VERSION 1.6.0
+ENV SALMON_VERSION 1.7.0
 
 # salmon binary will be installed in /home/salmon/bin/salmon
 
diff --git a/docker/build_test.sh b/docker/build_test.sh
index 09ae6af7c..f3641c328 100644
--- a/docker/build_test.sh
+++ b/docker/build_test.sh
@@ -1,3 +1,3 @@
 #! /bin/bash
-SALMON_VERSION=1.6.0
+SALMON_VERSION=1.7.0
 docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .

From f166c1ed0b924ab1e70dadb57378bb0279030f6c Mon Sep 17 00:00:00 2001
From: Rob Patro <rob@newton>
Date: Tue, 15 Feb 2022 23:46:34 -0500
Subject: [PATCH 13/13] rely on tagged pufferfish upstream

---
 scripts/fetchPufferfish.sh | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh
index 80e4b42ae..fd20f5879 100755
--- a/scripts/fetchPufferfish.sh
+++ b/scripts/fetchPufferfish.sh
@@ -4,6 +4,7 @@ set -eu -o pipefail
 exists()
 {
   command -v "$1" >/dev/null 2>&1
+
 }
 
 CURR_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
@@ -22,11 +23,11 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then
     rm -fr ${INSTALL_DIR}/src/pufferfish
 fi
 
-#SVER=salmon-v1.6.0
-SVER=develop
+SVER=salmon-v1.7.0
+#SVER=develop
 #SVER=sketch-mode
 
-EXPECTED_SHA256=f71b3c08f254200fcdc2eb8fe3dcca8a8e9489e79ef5952a4958d8b9979831dc
+EXPECTED_SHA256=5894fabbf6829a3d4a627135edc8326e931eb07fc792bfff3a0714e8fee8bb8b
 
 mkdir -p ${EXTERNAL_DIR}
 curl -k -L https://github.com/COMBINE-lab/pufferfish/archive/${SVER}.zip -o ${EXTERNAL_DIR}/pufferfish.zip
@@ -40,9 +41,11 @@ else
 	unset hashcheck
 fi
 
+
 if [ -z "${hashcheck-}" ]; then
     echo "Couldn't find shasum command; can't verify contents of downloaded pufferfish";
 else
+
     if [[ $SVER != develop ]]; then
         echo "${EXPECTED_SHA256}  ${EXTERNAL_DIR}/pufferfish.zip" | ${hashcheck} -c - || { echo "pufferfish.zip did not match expected SHA1! Exiting."; exit 1; }
     else
@@ -83,8 +86,10 @@ cp ${EXTERNAL_DIR}/pufferfish/include/MemChainer.hpp ${INSTALL_DIR}/include/puff
 cp ${EXTERNAL_DIR}/pufferfish/include/CommonTypes.hpp ${INSTALL_DIR}/include/pufferfish
 cp ${EXTERNAL_DIR}/pufferfish/include/SAMWriter.hpp ${INSTALL_DIR}/include/pufferfish
 cp ${EXTERNAL_DIR}/pufferfish/include/PufferfishConfig.hpp ${INSTALL_DIR}/include/pufferfish
+
 cp ${EXTERNAL_DIR}/pufferfish/include/BulkChunk.hpp ${INSTALL_DIR}/include/pufferfish
 cp ${EXTERNAL_DIR}/pufferfish/include/BinWriter.hpp ${INSTALL_DIR}/include/pufferfish
+
 cp -r ${EXTERNAL_DIR}/pufferfish/include/libdivide ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/ksw2pp ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/compact_vector ${INSTALL_DIR}/include/pufferfish
@@ -116,6 +121,7 @@ cp ${EXTERNAL_DIR}/pufferfish/src/rank9b.cpp ${INSTALL_DIR}/src/pufferfish
 #cp -r ${EXTERNAL_DIR}/RapMap/include/*.hpp ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/sparsepp ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/digestpp ${INSTALL_DIR}/include/rapmap
+
 #cp -r ${EXTERNAL_DIR}/RapMap/include/itlib ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/metro ${INSTALL_DIR}/include/rapmap
 #cp -r ${EXTERNAL_DIR}/RapMap/include/ksw2pp ${INSTALL_DIR}/include/rapmap
@@ -127,4 +133,5 @@ cp ${EXTERNAL_DIR}/pufferfish/src/rank9b.cpp ${INSTALL_DIR}/src/pufferfish
 #rm ${INSTALL_DIR}/include/rapmap/FastxParser.hpp
 #rm ${INSTALL_DIR}/include/rapmap/concurrentqueue.h
 #rm ${INSTALL_DIR}/include/rapmap/FastxParserThreadUtils.hpp
+
 #rm ${INSTALL_DIR}/src/rapmap/FastxParser.cpp