-
Notifications
You must be signed in to change notification settings - Fork 107
/
large-gpu.cfg
190 lines (134 loc) · 6.06 KB
/
large-gpu.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
###################
# General #
###################
[general_params]
# Device ordinal for CPU/GPU supports.
# Setting this to -1 will leverage CPU, a positive will run the model on the associated CUDA device id.
device = 0
# Seed for random number generators, fix seed to reproduce results.
# By default there is no seed and each turn should be unique.
seed
# Whether to enable debugging.
debug = False
###########################
# Text generation #
###########################
# DialoGPT (see https://github.com/microsoft/DialoGPT) or any other text generator supported by `transformers.pipeline`.
[generation_pipeline_kwargs]
# Keyword arguments passed to the text generation pipeline.
# The model that will be used by the pipeline 'text-generation' to generate responses.
# The model must be an instance of a pretrained model inheriting from `PreTrainedModel` or `TFPreTrainedModel`.
model = microsoft/DialoGPT-large
# The configuration that will be used by the pipeline to instantiate the model.
config
# The tokenizer that will be used by the pipeline to encode data for the model.
tokenizer
# The framework to use, either pt for PyTorch or tf for TensorFlow.
# The specified framework must be installed.
framework
[generator_kwargs]
# Keyword arguments passed to the text generator.
# See https://huggingface.co/blog/how-to-generate
# The maximal number of tokens to be returned, inclusive of punctuations etc.
# It will automatically stop if the end-of-sequence token was found earlier.
max_length = 1000
# The minimum length of the sequence to be generated.
min_length = 1
# Whether or not to use sampling; use greedy decoding otherwise.
do_sample = True
# Whether to stop the beam search when at least `num_beams` sentences are finished per batch or not.
early_stopping = False
# Number of beams for beam search.
# 1 means no beam search.
num_beams = 1
# Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
# 1 means no group beam search.
num_beam_groups = 1
# Value to control diversity for group beam search.
# The higher the penalty, the more diverse are the outputs.
diversity_penalty = 0.0
# The value used to module the next token probabilities.
# Must be strictly positive.
# Lower temperature results in less random completions.
# As the temperature approaches zero, the model will become deterministic and repetitive.
# Higher temperature results in more random completions.
temperature = 1
# The number of highest probability vocabulary tokens to keep for top-k-filtering.
# 1 means only 1 word is considered for each step (token), resulting in deterministic completions,
# while 40 means 40 words are considered at each step.
# 0 (default) is a special setting meaning no restrictions.
# 40 generally is a good value.
top_k = 40
# If set to float < 1, only the most probable tokens with probabilities
# that add up to top_p or higher are kept for generation.
top_p = 0.9
# The parameter for repetition penalty. 1.0 means no penalty.
repetition_penalty = 1
# Exponential penalty to the length. 1.0 means no penalty.
# Set to values < 1.0 in order to encourage the model to generate shorter sequences,
# to a value > 1.0 in order to encourage the model to produce longer sequences.
length_penalty = 1
# If set to int > 0, all ngrams of that size can only occur once.
no_repeat_ngram_size = 0
# The id of the padding token.
pad_token_id
# The id of the beginning-of-sequence token.
bos_token_id
# The id of the end-of-sequence token.
eos_token_id
# Comma separated list of token ids that are not allowed to be generated.
bad_words_ids
# The number of independently computed returned sequences for each element in the batch.
# You would need to use a response classifier or implement a function.
# For example, you can choose the most dissimilar message, or the lengthiest one.
# But keep in mind: the higher the number, the slower the generation.
num_return_sequences = 1
# If an encoder-decoder model starts decoding with a different token than bos, the id of that token.
decoder_start_token_id
# Whether or not the model should use the past last key/values attentions
# (if applicable to the model) to speed up decoding.
use_cache = True
# Whether or not to clean up the potential extra spaces in the text output.
clean_up_tokenization_spaces = True
############################
# Response ranking #
############################
# DialogRPT (see https://github.com/golsun/DialogRPT)
# Any ranker can be disabled by setting to empty.
# NOTE: Ensure num_return_sequences is greater than 1.
[prior_ranker_weights]
# The `prior` score is the weighted average of `human_vs_rand` and `human_vs_machine` predictions.
# Weight of "How relevant the response is for the given context?"
human_vs_rand_weight
# Weight of "How likely the response is human-written rather than machine-generated?"
human_vs_machine_weight
[cond_ranker_weights]
# The `cond` score is the weighted average of `updown`, `depth` and `width` predictions.
# Weight of "How likely the response gets the most upvotes?"
updown_weight
# Weight of "How likely the response gets the most direct replies?"
depth_weight
# Weight of "How likely the response gets the longest follow-up thread?"
width_weight
#########################
# Communication #
#########################
[chatbot_params]
# Parameters of the chatbot itself.
# The number of turns (turn = answer and response) the model should consider.
# Set to 0 to focus on the last message. Set to -1 for unlimited context length.
max_turns_history = 2
# Your Telegram token. See https://core.telegram.org/bots
telegram_token = YOUR_TOKEN_HERE
# Your GIPHY API token. See https://developers.giphy.com/docs/api/
giphy_token = YOUR_TOKEN_HERE
# Probability of returning a GIF.
giphy_prob = 0.1
# The maximal number of words the bot has to generate to also return a GIF.
giphy_max_words = 3
# Value from 0-10 which makes results weirder as you go up the scale.
giphy_weirdness = 5
# Whether to continue from the previous dialogue.
continue_after_restart = True
# The filename for storing the pickle files.
data_filename = bot_data.pkl