-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathConfiguration.ini
115 lines (107 loc) · 5.88 KB
/
Configuration.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
[MAIN]
debugging = False
with_wiki_pages = False
#path_document = ./data/twitter.csv
path_document = ./data/exportDatiWindTre.csv
#path_document = ./data/test.csv
SOM_output = ./data/SOM_output.txt
MST_html_output_file = ./data/output/Minimum_Spanning_Tree_topics.html
MST_json_output_file = ./static/json/graphFile.json
MST_html_d3_output_file = ./templates/MST_d3.html
MST_cluster_html_output_file = ./data/output/Minimum_Spanning_Tree_clusters.html
MST_cluster_csv_output_file = ./static/csv/bubbleChartFile.csv
MST_dried_topics_d3_base_file = ./templates/dried_
topic_frequencies_file_bubble = ./data/output/topic_frequencies_bubble.html
topic_frequencies_file_bar = ./data/output/topic_frequencies_bar.html
path_pickle_w2v_model = ./bin/word2vec_wiki.pickle
path_pickle_w2v_model_incr_fold = ./bin/word2vec/
path_pickle_som_model = ./bin/som_15x15_cosine.pickle
path_pickle_som_model_incr_fold = ./bin/som/
path_pickle_cluster_model = ./bin/cluster_model_15x15_min_25_cosine.pickle
path_pickle_codebook_cluster_model = ./bin/codebook_cluster.pickle
path_pickle_codebook_cluster_model_incr_fold = ./bin/codebook_cluster/
path_pickle_bigram_model = ./bin/bigram.pickle
path_pickle_bigram_model_incr_fold = ./bin/bigram/
path_pickle_stopwords_dict = ./bin/stopwords_dict.pickle
path_dried_topics = ./bin/dried_topics.pickle
path_vec2word = ./bin/vec2word.pickle
path_vec2tweets = ./bin/vec2tweets.pickle
path_word2tweet = ./bin/word2tweets.pickle
path_csv_output_folder = ./data/output/csv/
log_path = ./log
log_file_name = log
codebook_activation_filename = ./data/output/codebook_activation.png
cell_frequency_filename = ./data/output/cell_frequency.png
umatrix_filename = ./data/output/umatrix.png
class_definition = ./bin/word2vec_classification/class_definitions.json
path_word2vec_model_classification = ./bin/word2vec_classification/w2v_model.pkl
path_xgboost_model = ./bin/word2vec_classification/xgb_model.pkl
[VERBOSE]
VERBOSE_CLEANING = False
VERBOSE_ENTITY = False
VERBOSE_WIKI_PAGE = False
VERBOSE_CREATE_TWEET_CORPUS = False
[FLAG]
USE_ENTITY_FOR_W2V = True
USE_WIKIPEDIA_FOR_W2V = True
[ENTITY]
confidence = 0.25
lang = it
[W2V]
size = 100
min_count = 1
sg = 1
window = 5
iter = 50
alpha = 0.1
workers = 4
progress_per = 100
[ADVANCED_ASOM]
do_trainSom = False
alpha_max = 0.3
alpha_min = 0.01
height = 15
width = 15
outlier_unit_threshold = 0.2
outlier_percentile = 90.
Koutlier_percentile = 2
learning_rate_percentile = 0.1
num_epoch = 5000
training_type = 'adaptive'
batch_size = 10000
fast_training = False
verbose = 1
dimension_percentile = 100.0
empty_codebook_threshold = 1
min_size_codebook_mtx = 5
step_codebook_mtx = 2
num_cluster_min = 10
num_cluster_max = 10
[STOPWORDS_FILES]
file1 = ./stopwords-it/stopwords-it.txt
file2 = ./stopwords-it/cognomi.txt
file3 = ./stopwords-it/nomi_italiani.txt
file4 = ./stopwords-it/comuni-it.txt
file5 = ./stopwords-it/province_regioni.txt
file6 = ./stopwords-it/parolacce.txt
file7 = ./stopwords-it/accentate.txt
file8 = ./stopwords-it/altre_stopwords.txt
file9 = ./stopwords-it/raw/gh-stopwords-json-it.txt
file10 = ./stopwords-it/raw/language-resource-stopwords.txt
file11 = ./stopwords-it/raw/ranksnl-italian.txt
file12 = ./stopwords-it/raw/stop-words-italian.txt
file13 = ./stopwords-it/raw/stopwords-filter-it.txt
file14 = ./stopwords-it/alfabeto_fonetico-it.txt
file15 = ./stopwords-it/mesi_settimane-it.txt
file16 = ./stopwords-it/numeri-ordinali-it.txt
file17 = ./stopwords-it/nazioni-it.txt
file18 = ./stopwords-it/capitali_mondo-it.txt
file19 = ./stopwords-it/nomi-etnici-it.txt
file20 = ./stopwords-it/adverb.txt
[GRAPH_IMG]
#Graphics style of the nodes in the similarity graph (markers, text)
#node_style = markers
num_of_topic_for_frequencies = 30
[TOPICS]
num_nearest_words = 5
path_output_similarity_graph = ./data/similarity_graph