forked from hugp-ri/hicup-plus
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
documents changes to HiCUP+ fork increments version number updates maintainer and citation
- Loading branch information
1 parent
c1bbcc2
commit d2a1802
Showing
9 changed files
with
193 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,45 @@ | ||
RELEASE NOTES FOR HICUP-Plus v1.0.0 (15 FEBRUARY 2022) | ||
------------------------------------------------- | ||
|
||
HiCUP+ v1.0.0 is a major update incorporating the changes described below: | ||
|
||
- refactors error handling and warnings without changes to results from | ||
Bowtie or Bowtie2 aligners | ||
|
||
- initialises reverse read for pairing and migrates filtering to use | ||
matches in header strings rather than index numbers | ||
|
||
- adds calls to the HiSAT2 aligner (Kim et al., 2019) using stream | ||
inputs and parameters equivalent to the Bowtie2 parameters | ||
|
||
- adds a call to the proprietary Dragen (Illumina Inc., 2021) aligner | ||
via a system call with the error handling and log outputs adjusted | ||
for compatibility | ||
|
||
- updates to pairing and reporting scripts for compatilibity with | ||
the Dragen aligner run only if this aligner is chosen | ||
|
||
- updates the documentation to describe configuration of changing | ||
aligners to HiSAT2 or Dragen | ||
|
||
- updates R graphical devices to PDF (removing need for Cairo graphics in SVG format) | ||
|
||
- updates Maintainer of this fork to S. Thomas Kelly | ||
**[email protected]** (Bioinformatics Team, | ||
H.U. Group Research Institute G.K., Tokyo, Japan; SRL Inc., Tokyo, Japan) | ||
|
||
This release is a fork of the original HiCUP pipline. See the release notes | ||
for previous versions below and the GitHub repository for more details: | ||
https://github.com/StevenWingett/HiCUP | ||
|
||
This fork has been renamed to HiCUP-Plus (HiCUP+) to avoid it being | ||
mistaken to the original implementation. | ||
|
||
Full details of changes can be viewed on GitHub: | ||
https://github.com/hugp-ri/hicup-plus/compare/test-original...master#diff-HEAD?w=1 | ||
|
||
|
||
|
||
RELEASE NOTES FOR HICUP v0.8.2 (13 JULY 2021) | ||
------------------------------------------------- | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,19 @@ use Data::Dumper; | |
################################################################################### | ||
################################################################################### | ||
|
||
################################################################################### | ||
################################################################################### | ||
## ## | ||
## This file has been modified to support Dragen and HiSAT2 configurations ## | ||
## ## | ||
## HiCUP+ (HiCUP-Plus) ## | ||
## Maintained by S. Thomas Kelly ([email protected]) ## | ||
## ## | ||
## Changes: supports addition configuration options ## | ||
## ## | ||
################################################################################### | ||
################################################################################### | ||
|
||
########################################################## | ||
#Get user-supplied parameters | ||
#Option variables | ||
|
@@ -112,7 +125,7 @@ if ( $config{help} ) { | |
|
||
#Print version and exit | ||
if ( $config{version} ) { | ||
print "HiCUP v$hicup_module::VERSION\n"; | ||
print "HiCUP+ v$hicup_module::VERSION\n"; | ||
exit(0); | ||
} | ||
|
||
|
@@ -125,8 +138,8 @@ if( hasval($config{config}) ){ | |
die "Configuration file '$config{config}' does not exist\n" unless(-e $config{config}); | ||
} | ||
|
||
print "Starting HiCUP pipeline (v$hicup_module::VERSION)\n" unless $config{quiet}; | ||
print "PLEASE NOTE: FROM VERSION 8, HICUP REQUIRES THE R PACKAGES TIDYVERSE AND PLOTLY INSTALLED\n"; | ||
print "Starting HiCUP+ pipeline (v$hicup_module::VERSION)\n" unless $config{quiet}; | ||
print "PLEASE NOTE: HICUP+ REQUIRES THE R PACKAGES TIDYVERSE AND PLOTLY INSTALLED\n"; | ||
print "SEE DOCUMENTATION FOR MORE DETAILS\n"; | ||
|
||
my @filenames; | ||
|
@@ -183,7 +196,7 @@ foreach my $file (@hicup_final_outfiles) { | |
} | ||
|
||
unless ( check_files_exist( \@hicup_final_outfiles, 'NOT_EXISTS' ) ) { | ||
die "HiCUP will not run until these files have been removed.\n"; | ||
die "HiCUP+ will not run until these files have been removed.\n"; | ||
} | ||
|
||
######################################################################### | ||
|
@@ -338,7 +351,7 @@ if ( $config{zip} ) { | |
} | ||
|
||
#Map and pair sequences | ||
my $ligation = "\@PG\tID:HiCUP Truncater\tVN:" . $hicup_module::VERSION; #Create the hidden ligation flag to send to HiCUP mapper to print the hicup_truncation settings to the output file | ||
my $ligation = "\@PG\tID:HiCUP+ Truncater\tVN:" . $hicup_module::VERSION; #Create the hidden ligation flag to send to HiCUP mapper to print the hicup_truncation settings to the output file | ||
if ( $config{re1} ) { | ||
#We cannot have a ":" in the SAM header at this point, so remove it | ||
my $colonless_re1 = $config{re1}; | ||
|
@@ -458,10 +471,10 @@ if($sonication_protocol){ | |
} | ||
} else { | ||
print "Skipping summary report\n"; | ||
print "HiCUP now only collates results for sonication protocol datasets\n"; | ||
print "HiCUP+ now only collates results for sonication protocol datasets\n"; | ||
} | ||
|
||
print "HiCUP processing complete.\n" unless $config{quiet}; | ||
print "HiCUP+ processing complete.\n" unless $config{quiet}; | ||
|
||
exit(0); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,20 @@ use Data::Dumper; | |
################################################################################### | ||
################################################################################### | ||
|
||
################################################################################### | ||
################################################################################### | ||
## ## | ||
## This file has been modified to support Dragen and HiSAT2 configurations ## | ||
## ## | ||
## HiCUP+ (HiCUP-Plus) ## | ||
## Maintained by S. Thomas Kelly ([email protected]) ## | ||
## ## | ||
## Changes: no major changes (whitespace edited to enable debugging) ## | ||
## ## | ||
################################################################################### | ||
################################################################################### | ||
|
||
|
||
#Option variables | ||
my %config = ( | ||
batch_size => '', | ||
|
@@ -77,7 +91,7 @@ if ( $config{help} ) { | |
|
||
#Print version and exit | ||
if ( $config{version} ) { | ||
print "HiCUP Deduplicator v$hicup_module::VERSION\n"; | ||
print "HiCUP+ Deduplicator v$hicup_module::VERSION\n"; | ||
exit(0); | ||
} | ||
|
||
|
@@ -106,11 +120,11 @@ foreach my $outfile (@hicup_deduplicator_outfiles) { | |
} | ||
|
||
unless ( check_files_exist( \@hicup_deduplicator_outfiles, 'NOT_EXISTS' ) ) { | ||
die "HiCUP Deduplicator will not run until files have been removed.\n"; | ||
die "HiCUP+ Deduplicator will not run until files have been removed.\n"; | ||
} | ||
|
||
#Begin de-duplication | ||
print "Removing duplicates with HiCUP Deduplicator v$hicup_module::VERSION\n" unless ( $config{quiet} ); | ||
print "Removing duplicates with HiCUP+ Deduplicator v$hicup_module::VERSION\n" unless ( $config{quiet} ); | ||
|
||
open( SUMMARY, '>', $config{outdir} . $summary_filename ) or die "Could not write to summary file '$config{outdir} . $summary_filename'.\n"; | ||
print SUMMARY "File\tRead_pairs_processed\tUnique_di-tags\tCis_<10kbp_of_uniques\tCis_>10kbp_of_uniques\tTrans_of_uniques\n"; #Write header line to file | ||
|
@@ -220,7 +234,7 @@ sub process_file { | |
next; | ||
} else { | ||
if ($in_header) { | ||
my $sam_header_line = "\@PG\tID:HiCUP Deduplicator\tVN:" . "$hicup_module::VERSION\n"; | ||
my $sam_header_line = "\@PG\tID:HiCUP+ Deduplicator\tVN:" . "$hicup_module::VERSION\n"; | ||
print UNIQUES $sam_header_line; | ||
$in_header = 0; | ||
} | ||
|
@@ -256,23 +270,22 @@ sub process_file { | |
$fh = $filehandler{$temp_filename}; | ||
print $fh $readF, $readR or die "Could not write to $temp_filename: $!"; | ||
} else { | ||
if(scalar keys %filehandler >= 100){ #Large number of filehandles already open! | ||
#Very, very rarely a file may require a huge number of filehandles | ||
#(e.g. if mapping against a genome with thousands of scaffold files) | ||
#Owing to Linux limits on the number of filehandles that may be opened | ||
#simultaneously, HiCUP may not process such file. To make it less likely | ||
#HiCUP will crash, this script will now open only 100 filehandles and additional | ||
#data will be sent to an extra filehandle called EXTRA. This is not a perfect fix, | ||
#rather a patch for one sample requiring HiCUP processing. | ||
$temp_filename = $tempdir . '/' . $filename_no_folder_refs . "_" . 'EXTRA' . ".temp"; | ||
if(scalar keys %filehandler >= 100){ #Large number of filehandles already open! | ||
#Very, very rarely a file may require a huge number of filehandles | ||
#(e.g. if mapping against a genome with thousands of scaffold files) | ||
#Owing to Linux limits on the number of filehandles that may be opened | ||
#simultaneously, HiCUP may not process such file. To make it less likely | ||
#HiCUP will crash, this script will now open only 100 filehandles and additional | ||
#data will be sent to an extra filehandle called EXTRA. This is not a perfect fix, | ||
#rather a patch for one sample requiring HiCUP processing. | ||
$temp_filename = $tempdir . '/' . $filename_no_folder_refs . "_" . 'EXTRA' . ".temp"; | ||
$filehandler{$temp_filename} = newopen($temp_filename) unless exists $filehandler{$temp_filename}; | ||
$fh = $filehandler{$temp_filename}; | ||
|
||
}else{ #Print to a new file | ||
$filehandler{$temp_filename} = newopen($temp_filename); | ||
$fh = $filehandler{$temp_filename}; | ||
} | ||
print $fh $readF, $readR or die "Could not write to $temp_filename: $!"; | ||
} else { #Print to a new file | ||
$filehandler{$temp_filename} = newopen($temp_filename); | ||
$fh = $filehandler{$temp_filename}; | ||
} | ||
print $fh $readF, $readR or die "Could not write to $temp_filename: $!"; | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,19 @@ use Data::Dumper; | |
################################################################################### | ||
################################################################################### | ||
|
||
################################################################################### | ||
################################################################################### | ||
## ## | ||
## This file has been modified to support Dragen and HiSAT2 configurations ## | ||
## ## | ||
## HiCUP+ (HiCUP-Plus) ## | ||
## Maintained by S. Thomas Kelly ([email protected]) ## | ||
## ## | ||
## Changes: no major changes (whitespace edited to enable debugging) ## | ||
## ## | ||
################################################################################### | ||
################################################################################### | ||
|
||
#Get user-supplied parameters | ||
#Option variables | ||
my %config = ( | ||
|
@@ -76,7 +89,7 @@ if ( $config{help} ) { | |
|
||
#Print version and exit | ||
if ($config{version}) { | ||
print "HiCUP Digester v$hicup_module::VERSION\n"; | ||
print "HiCUP+ Digester v$hicup_module::VERSION\n"; | ||
exit(0); | ||
} | ||
|
||
|
@@ -115,7 +128,7 @@ unless ( check_parameters() ) { | |
die "Please change configuration file and/or command-line parameters and/or installation accordingly\n"; | ||
} | ||
|
||
print "HiCUP Digester (version $hicup_module::VERSION)\n"; | ||
print "HiCUP+ Digester (version $hicup_module::VERSION)\n"; | ||
print "Digesting files\n" unless $config{quiet}; | ||
|
||
# First restriction digestion - with potential additonal enzymes | ||
|
@@ -496,8 +509,8 @@ hicup_digester [OPTIONS]... [FASTA FILES]... | |
FUNCTION | ||
The HiCUP pipeline removes Hi-C artefacts, requiring a reference digested | ||
genome. HiCUP Digester identifies the cut sites in FASTA files. The script | ||
prints the results to file for subsequent processing by HiCUP Filter. | ||
genome. HiCUP+ Digester identifies the cut sites in FASTA files. The script | ||
prints the results to file for subsequent processing by HiCUP+ Filter. | ||
The names of the files to be processed and the digestion parameters may be | ||
passed to the script by a configuration file or command line arguments. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,20 @@ use Data::Dumper; | |
################################################################################### | ||
################################################################################### | ||
|
||
################################################################################### | ||
################################################################################### | ||
## ## | ||
## This file has been modified to support Dragen and HiSAT2 configurations ## | ||
## ## | ||
## HiCUP+ (HiCUP-Plus) ## | ||
## Maintained by S. Thomas Kelly ([email protected]) ## | ||
## ## | ||
## Changes: no major changes (whitespace edited to enable debugging) ## | ||
## ## | ||
################################################################################### | ||
################################################################################### | ||
|
||
|
||
#Option variables | ||
my %config = ( | ||
digest => '', | ||
|
@@ -85,7 +99,7 @@ if ( $config{help} ) { | |
|
||
#Print version and exit | ||
if ( $config{version} ) { | ||
print "HiCUP Filter v$hicup_module::VERSION\n"; | ||
print "HiCUP+ Filter v$hicup_module::VERSION\n"; | ||
exit(0); | ||
} | ||
|
||
|
@@ -135,7 +149,7 @@ foreach my $outfile (@hicup_filter_outfiles) { | |
} | ||
|
||
unless ( check_files_exist( \@hicup_filter_outfiles, 'NOT_EXISTS' ) ) { | ||
die "HiCUP Filter will not run until files have been removed.\n"; | ||
die "HiCUP+ Filter will not run until files have been removed.\n"; | ||
} | ||
|
||
#Create a directory for rejected Hi-C sequences | ||
|
@@ -144,7 +158,7 @@ $rejdir = $config{outdir} . $rejdir; | |
mkdir $rejdir or die "Could not write to '$rejdir'\n"; | ||
|
||
#Processes the data in accordance with the protocol followed (i.e. double digest or sonication) | ||
print "Filtering with HiCUP Filter v$hicup_module::VERSION\n" unless hasval( $config{quiet} ); | ||
print "Filtering with HiCUP+ Filter v$hicup_module::VERSION\n" unless hasval( $config{quiet} ); | ||
|
||
my $terminate = 0; #Instruct script to die if error detected in child process | ||
my %digest_fragments; | ||
|
@@ -1036,7 +1050,7 @@ sub sonicate_hic { | |
next; | ||
} else { | ||
if ($in_header) { | ||
my $sam_header_line = "\@PG\tID:HiCUP Filter\tVN:" . "$hicup_module::VERSION\t"; | ||
my $sam_header_line = "\@PG\tID:HiCUP+ Filter\tVN:" . "$hicup_module::VERSION\t"; | ||
$sam_header_line .= "DS:\"Max insert " . $config{longest} . ' Min insert ' . $config{shortest} . ' Digest file ' . "$config{digest}\"\n"; | ||
print HIC_READS $sam_header_line; | ||
if ($insert_size_check) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,29 @@ use Data::Dumper; | |
################################################################################### | ||
################################################################################### | ||
|
||
################################################################################### | ||
################################################################################### | ||
## ## | ||
## This file has been modified to support Dragen and HiSAT2 configurations ## | ||
## ## | ||
## HiCUP+ (HiCUP-Plus) ## | ||
## Maintained by S. Thomas Kelly ([email protected]) ## | ||
## ## | ||
## Changes: additional input parameters for additional aligners ## | ||
## verbose log output for debugging ## | ||
## add aligner calls for HiSAT2 and Dragen ## | ||
## skip counting reads too short to map for Dragen ## | ||
## report results for HiSaT2 using same parameters as Bowtie2 ## | ||
## skip logs from stdout with Dragen ## | ||
## run Dragen with a system call and print logs to files ## | ||
## parse Dragen mapping statistics from log files ## | ||
## initialise values and correct whitespace to allow debugging ## | ||
## initialise reverse read and match by header strings ## | ||
## ## | ||
################################################################################### | ||
################################################################################### | ||
|
||
|
||
#Option variables | ||
my %config = ( | ||
bowtie => '', | ||
|
@@ -93,7 +116,7 @@ if ( $config{help} ) { | |
|
||
#Print version and exit | ||
if ( $config{version} ) { | ||
print "HiCUP Mapper v$hicup_module::VERSION\n"; | ||
print "HiCUP+ Mapper v$hicup_module::VERSION\n"; | ||
exit(0); | ||
} | ||
|
||
|
@@ -140,7 +163,7 @@ foreach my $file (@hicup_Mapper_Outfiles) { #Add filename e | |
} | ||
|
||
unless ( check_files_exist( \@hicup_Mapper_Outfiles, 'NOT_EXISTS' ) ) { | ||
die "HiCUP mapper will not run until files have been removed.\n"; | ||
die "HiCUP+ mapper will not run until files have been removed.\n"; | ||
} | ||
|
||
|
||
|
@@ -160,7 +183,7 @@ $summaryfileTemp = $config{outdir} . $summaryfileTemp; | |
|
||
open( SUMMARYTEMP, ">$summaryfileTemp" ) or die "Could not write to '$summaryfileTemp' : $!"; | ||
|
||
print "Mapping with HiCUP Mapper v$hicup_module::VERSION\n" unless $config{quiet}; | ||
print "Mapping with HiCUP+ Mapper v$hicup_module::VERSION\n" unless $config{quiet}; | ||
print "Using aligner '$config{aligner}'\n" unless $config{quiet}; | ||
|
||
my $terminate = 0; #Instruct script to die if error detected in child process | ||
|
@@ -197,7 +220,7 @@ close SUMMARYTEMP or die "Could not close filehandle on '$summaryfileTemp' : $!" | |
my %summary_results = extract_mapping_results("$summaryfileTemp"); #Stores the results to write to the summary file | ||
|
||
#Now pair the files | ||
print "Pairing files with HiCUP Mapper v$hicup_module::VERSION\n" unless $config{quiet}; | ||
print "Pairing files with HiCUP+ Mapper v$hicup_module::VERSION\n" unless $config{quiet}; | ||
|
||
open( SUMMARY, ">$summaryfile" ) or die "Could not write to $summaryfile : $!"; | ||
print SUMMARY "File\tTotal_reads_processed\tReads_too_short_to_map\t%Reads_too_short_to_map\tUnique_alignments\t%Unique_alignments\tMultiple_alignments\t%Multiple_alignments\tFailed_to_align\t%failed_to_align\tPaired\t%Paired\n"; | ||
|
@@ -671,7 +694,7 @@ sub pair { | |
} else { | ||
|
||
if ($in_header) { #Print the additional SAM header line | ||
my $sam_header_line = "\@PG\tID:HiCUP Mapper\tVN:" . "$hicup_module::VERSION\n"; | ||
my $sam_header_line = "\@PG\tID:HiCUP+ Mapper\tVN:" . "$hicup_module::VERSION\n"; | ||
print PAIRED $sam_header_line; | ||
|
||
if ( $config{ligation} ) { | ||
|
Oops, something went wrong.