-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.ini
executable file
·113 lines (74 loc) · 3.04 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
[preprocess]
# For raptor
#in_tensor_dir : ./data/train-ready/pred-full/
# For comet
#in_tensor_dir : /scratch/mtari008/37154933/pred-full-deepnovo/
# For expanse
in_tensor_dir : /disk/raptor-2/mtari008/data/deepsnap/train-ready/nist_massiv_80k_no_ch_graymass-semi/
############ INPUT PARAMETERS ############
[input]
# file paths. Ignore the three path parameters below.
; msp_files : /oasis/projects/nsf/wmu101/mtari008/DeepSNAP/data/msp
mgf_files : sample_data/mgfs
db_peps_path : /expanse/lustre/projects/wmu101/mtari008/DeepSNAP/data/db_peps/db-peps.pkl
spec_size : 80000 # The array size to store a spectrum.
charge : 8 # Max charge value to be used for training.
use_mods : False # Whether to use modifications or not (both training and database search).
num_mods: 5 # Max mods per peptide
num_species : 9 # Number of species the training dataset contains. Deprecated. will not have any effect.
master_port : 12345 # if you get an error that port is already in use change this value to anothe number.
############ DATABASE SEARCH PARAMETERS ############
[search]
# This is model that will be loaded during search. It will be loaded from the models directory.
# 22 at the end is the epoch number. That's how the models are saved.
model_name : model_weights/specollate_model.pt
# absolute directory path with mgf file to be searched. files must have .mgf extension.
mgf_dir : sample_data/mgfs
# path where preprocessed mgf spectra from the above directory will be placed.
prep_dir : sample_data/preprocessed
# directory path containing peptide file obtained from OpenMS Digestor tool.
pep_dir : sample_data/peptides
# directory path where percolator input files will be placed.
# Use crux percolator tool to analyze these files.
out_pin_dir : output
# Batch sizes for forward pass through the network.
# These sizes have been tested for 12 GBs of GPU memory.
spec_batch_size : 16384
pep_batch_size : 4096
# Batch size for database search. 1024 seems to work better.
search_spec_batch_size : 1024
precursor_tolerance : 20 # Precursor tolerance to use during database search (Da or ppm)
precursor_tolerance_type : ppm # either ppm or Da
keep_psms : 5 # Number of top scoring psms to keep
# Number of modified peptides to be generated to search against.
# Different than the one in input section
num_mods : 1
# charge filter for input spectra.
# Note that spectra with all charges will be searched against charge independent peptide embeddings.
charge: 8
############ MACHINE LEARNING PARAMETERS ############
[ml]
# model will be stored by the this name in the /models directory.
model_name : 512-embed-2-lstm-SnapLoss2D-80k-nist-massive-gmc-semi-r2
batch_size : 1024
test_size : 0.2
pep_seq_len : 64
train_count : 0
snp_weight : 1
ce_weight : 0.001
mse_weight : 0.00001
dropout : 0.3
lr : 0.0001
weight_decay : 0.0001
epochs : 200
margin : 0.2
read_split_listing : False
############ DEFAULT VALUES ############
# DO NOT CHANGE
[default]
msp_file : /data/human_consensus_final_true_lib.msp
mgf_files : /data/
spec_size : 8000
charge : 2
use_mods : False
batch_size : 1024