Configuration.ini

[MAIN]
debugging                                     =   False
with_wiki_pages                               =   False
#path_document                                 =   ./data/twitter.csv
path_document                                 =   ./data/exportDatiWindTre.csv
#path_document                                 =   ./data/test.csv
SOM_output                                    =   ./data/SOM_output.txt
MST_html_output_file                          =   ./data/output/Minimum_Spanning_Tree_topics.html
MST_json_output_file                          =   ./static/json/graphFile.json
MST_html_d3_output_file                       =   ./templates/MST_d3.html
MST_cluster_html_output_file                  =   ./data/output/Minimum_Spanning_Tree_clusters.html
MST_cluster_csv_output_file                   =    ./static/csv/bubbleChartFile.csv
MST_dried_topics_d3_base_file                 =   ./templates/dried_
topic_frequencies_file_bubble                 =   ./data/output/topic_frequencies_bubble.html
topic_frequencies_file_bar                    =   ./data/output/topic_frequencies_bar.html
path_pickle_w2v_model                         =   ./bin/word2vec_wiki.pickle
path_pickle_w2v_model_incr_fold               =   ./bin/word2vec/
path_pickle_som_model                         =   ./bin/som_15x15_cosine.pickle
path_pickle_som_model_incr_fold               =   ./bin/som/
path_pickle_cluster_model                     =   ./bin/cluster_model_15x15_min_25_cosine.pickle
path_pickle_codebook_cluster_model            =   ./bin/codebook_cluster.pickle
path_pickle_codebook_cluster_model_incr_fold  =   ./bin/codebook_cluster/
path_pickle_bigram_model                      =   ./bin/bigram.pickle
path_pickle_bigram_model_incr_fold            =   ./bin/bigram/
path_pickle_stopwords_dict                    =   ./bin/stopwords_dict.pickle
path_dried_topics                             =   ./bin/dried_topics.pickle
path_vec2word                                 =   ./bin/vec2word.pickle
path_vec2tweets                               =   ./bin/vec2tweets.pickle
path_word2tweet                               =   ./bin/word2tweets.pickle
path_csv_output_folder                        =   ./data/output/csv/
log_path                                      =   ./log
log_file_name                                 =   log
codebook_activation_filename                  =   ./data/output/codebook_activation.png
cell_frequency_filename                       =   ./data/output/cell_frequency.png
umatrix_filename                              =   ./data/output/umatrix.png
class_definition                              =   ./bin/word2vec_classification/class_definitions.json
path_word2vec_model_classification            =   ./bin/word2vec_classification/w2v_model.pkl
path_xgboost_model                            =   ./bin/word2vec_classification/xgb_model.pkl

[VERBOSE]
VERBOSE_CLEANING                              =   False
VERBOSE_ENTITY                                =   False
VERBOSE_WIKI_PAGE                             =   False
VERBOSE_CREATE_TWEET_CORPUS                   =   False

[FLAG]
USE_ENTITY_FOR_W2V                            =   True
USE_WIKIPEDIA_FOR_W2V                         =   True

[ENTITY]
confidence                                    =   0.25
lang                                          =   it

[W2V]
size                                          =   100
min_count                                     =   1
sg                                            =   1
window                                        =   5
iter                                          =   50
alpha                                         =   0.1
workers                                       =   4
progress_per                                  =   100

[ADVANCED_ASOM]
do_trainSom                               =   False
alpha_max                                 =   0.3
alpha_min                                 =   0.01
height                                    =   15
width                                     =   15
outlier_unit_threshold                    =   0.2
outlier_percentile                        =   90.
Koutlier_percentile                       =   2
learning_rate_percentile                  =   0.1
num_epoch                                 =   5000
training_type                             =   'adaptive'
batch_size                                =   10000
fast_training                             =   False
verbose                                   =   1
dimension_percentile                      =   100.0
empty_codebook_threshold                  =   1
min_size_codebook_mtx                     =   5
step_codebook_mtx                         =   2
num_cluster_min                           =   10
num_cluster_max                           =   10

[STOPWORDS_FILES]
file1   = ./stopwords-it/stopwords-it.txt
file2   = ./stopwords-it/cognomi.txt
file3   = ./stopwords-it/nomi_italiani.txt
file4   = ./stopwords-it/comuni-it.txt
file5   = ./stopwords-it/province_regioni.txt
file6   = ./stopwords-it/parolacce.txt
file7   = ./stopwords-it/accentate.txt
file8   = ./stopwords-it/altre_stopwords.txt
file9   = ./stopwords-it/raw/gh-stopwords-json-it.txt
file10  = ./stopwords-it/raw/language-resource-stopwords.txt
file11  = ./stopwords-it/raw/ranksnl-italian.txt
file12  = ./stopwords-it/raw/stop-words-italian.txt
file13  = ./stopwords-it/raw/stopwords-filter-it.txt
file14  = ./stopwords-it/alfabeto_fonetico-it.txt
file15  = ./stopwords-it/mesi_settimane-it.txt
file16  = ./stopwords-it/numeri-ordinali-it.txt
file17  = ./stopwords-it/nazioni-it.txt
file18  = ./stopwords-it/capitali_mondo-it.txt
file19  = ./stopwords-it/nomi-etnici-it.txt
file20  = ./stopwords-it/adverb.txt

[GRAPH_IMG]
#Graphics style of the nodes in the similarity graph (markers, text)
#node_style                                = markers
num_of_topic_for_frequencies               = 30

[TOPICS]
num_nearest_words                          =   5
path_output_similarity_graph               =   ./data/similarity_graph