beshkenadze commited on
Commit
b1072f3
·
verified ·
1 Parent(s): 4de5909

Upload 8-bit Cohere MLX checkpoint

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mlx
3
+ tags:
4
+ - automatic-speech-recognition
5
+ - speech-to-text
6
+ - audio
7
+ - mlx
8
+ - safetensors
9
+ license: other
10
+ ---
11
+
12
+ # cohere-transcribe-03-2026-mlx-8bit
13
+
14
+ Quantized MLX weights for **beshkenadze/cohere-transcribe-03-2026-mlx-fp16**.
15
+
16
+ ## Variant
17
+
18
+ - Precision: **8-bit**
19
+ - Quantization mode: `affine`
20
+ - Group size: `64`
21
+
22
+ ## Files
23
+
24
+ - `model.safetensors`
25
+ - `config.json`
26
+ - `tokenizer.model`
27
+ - `tokenizer_config.json`
28
+ - `preprocessor_config.json`
29
+ - `special_tokens_map.json`
30
+ - `key_map.json`
31
+ - `conversion_summary.json`
32
+
33
+ ## Repo-sample benchmark
34
+
35
+ Sample: `Tests/media/conversational_a.wav`
36
+
37
+ - Generation TPS: **352.9**
38
+ - Peak memory: **2.87 GB**
39
+ - Output: `Coffee's story likely begins in Ethiopia, where legend tells of a goat herder named Kaldi, who noticed his goats became energetic after eating red berries from a particular bush; curious, he tried them himself and felt invigorated.`
40
+
41
+ ## Quality note
42
+
43
+ Matches fp16 on the repo sample while reducing memory substantially.
44
+
45
+ ## Notes
46
+
47
+ - Generated from the Swift-compatible fp16 checkpoint `beshkenadze/cohere-transcribe-03-2026-mlx-fp16`.
48
+ - This repository contains inference artifacts only. Refer to the upstream Cohere model card and license for original model details.
config.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures" : [
3
+ "CohereAsrForConditionalGeneration"
4
+ ],
5
+ "auto_map" : {
6
+ "AutoConfig" : "configuration_cohere_asr.CohereAsrConfig",
7
+ "AutoFeatureExtractor" : "processing_cohere_asr.CohereAsrFeatureExtractor",
8
+ "AutoModel" : "modeling_cohere_asr.CohereAsrModel",
9
+ "AutoModelForSpeechSeq2Seq" : "modeling_cohere_asr.CohereAsrForConditionalGeneration",
10
+ "AutoProcessor" : "processing_cohere_asr.CohereAsrProcessor",
11
+ "AutoTokenizer" : "tokenization_cohere_asr.CohereAsrTokenizer"
12
+ },
13
+ "batch_size" : 128,
14
+ "decoding" : {
15
+ "beam" : {
16
+ "beam_size" : 1,
17
+ "len_pen" : 0,
18
+ "max_generation_delta" : 50
19
+ },
20
+ "return_best_hypothesis" : true,
21
+ "strategy" : "beam"
22
+ },
23
+ "encoder" : {
24
+ "att_context_size" : [
25
+ -1,
26
+ -1
27
+ ],
28
+ "causal_downsampling" : false,
29
+ "conv_context_size" : null,
30
+ "conv_kernel_size" : 9,
31
+ "conv_norm_type" : "batch_norm",
32
+ "d_model" : 1280,
33
+ "dropout" : 0,
34
+ "dropout_att" : 0,
35
+ "dropout_emb" : 0,
36
+ "dropout_pre_encoder" : 0,
37
+ "feat_in" : 128,
38
+ "feat_out" : -1,
39
+ "ff_expansion_factor" : 4,
40
+ "n_heads" : 8,
41
+ "n_layers" : 48,
42
+ "pos_emb_max_len" : 5000,
43
+ "reduction" : null,
44
+ "reduction_factor" : 1,
45
+ "reduction_position" : null,
46
+ "self_attention_model" : "rel_pos",
47
+ "subsampling" : "dw_striding",
48
+ "subsampling_conv_channels" : 256,
49
+ "subsampling_factor" : 8,
50
+ "untie_biases" : true,
51
+ "xscaling" : false
52
+ },
53
+ "head" : {
54
+ "activation" : "relu",
55
+ "dropout" : 0,
56
+ "hidden_size" : 1024,
57
+ "log_softmax" : true,
58
+ "num_classes" : 16384,
59
+ "num_layers" : 1,
60
+ "use_transformer_init" : true
61
+ },
62
+ "is_encoder_decoder" : true,
63
+ "log_batch_stats" : false,
64
+ "log_prediction" : true,
65
+ "max_audio_clip_s" : 35,
66
+ "max_seq_len" : 1024,
67
+ "model_defaults" : {
68
+ "asr_enc_hidden" : 1280,
69
+ "lm_dec_hidden" : 1024,
70
+ "lm_enc_hidden" : 1024
71
+ },
72
+ "model_type" : "cohere_asr",
73
+ "multitask_metrics_cfg" : {
74
+ "log_predictions" : true,
75
+ "metrics" : {
76
+ "wer" : {
77
+ "constraint" : ".source_lang==.target_lang"
78
+ }
79
+ }
80
+ },
81
+ "overlap_chunk_second" : 5,
82
+ "preprocessor" : {
83
+ "dither" : 1.0000000000000001e-05,
84
+ "features" : 128,
85
+ "frame_splicing" : 1,
86
+ "log" : true,
87
+ "n_fft" : 512,
88
+ "normalize" : "per_feature",
89
+ "pad_to" : 0,
90
+ "pad_value" : 0,
91
+ "sample_rate" : 16000,
92
+ "window" : "hann",
93
+ "window_size" : 0.025000000000000001,
94
+ "window_stride" : 0.01
95
+ },
96
+ "prompt_defaults" : [
97
+ {
98
+ "role" : "user",
99
+ "slots" : {
100
+ "decodercontext" : "",
101
+ "diarize" : "<|nodiarize|>",
102
+ "emotion" : "<|emo:undefined|>",
103
+ "itn" : "<|noitn|>",
104
+ "pnc" : "<|pnc|>",
105
+ "source_lang" : "<|en|>",
106
+ "target_lang" : "<|en|>",
107
+ "timestamp" : "<|notimestamp|>"
108
+ }
109
+ },
110
+ {
111
+ "role" : "user_partial",
112
+ "slots" : {
113
+ "decodercontext" : ""
114
+ }
115
+ }
116
+ ],
117
+ "prompt_format" : "cohere_asr",
118
+ "quantization" : {
119
+ "bits" : 8,
120
+ "group_size" : 64,
121
+ "mode" : "affine"
122
+ },
123
+ "quantization_config" : {
124
+ "bits" : 8,
125
+ "group_size" : 64,
126
+ "mode" : "affine"
127
+ },
128
+ "sample_rate" : 16000,
129
+ "supported_languages" : [
130
+ "en",
131
+ "fr",
132
+ "de",
133
+ "es",
134
+ "it",
135
+ "pt",
136
+ "nl",
137
+ "pl",
138
+ "el",
139
+ "ar",
140
+ "ja",
141
+ "zh",
142
+ "vi",
143
+ "ko"
144
+ ],
145
+ "transf_decoder" : {
146
+ "config_dict" : {
147
+ "attn_layer_dropout" : 0,
148
+ "attn_score_dropout" : 0,
149
+ "embedding_dropout" : 0,
150
+ "ffn_dropout" : 0,
151
+ "hidden_act" : "relu",
152
+ "hidden_size" : 1024,
153
+ "inner_size" : 4096,
154
+ "learn_positional_encodings" : false,
155
+ "lm_dec_hidden" : 1280,
156
+ "max_sequence_length" : 1024,
157
+ "num_attention_heads" : 8,
158
+ "num_layers" : 8,
159
+ "num_token_types" : 0,
160
+ "pre_ln" : true,
161
+ "vocab_size" : "None"
162
+ },
163
+ "encoder" : null,
164
+ "model_name" : null,
165
+ "pre_ln_final_layer_norm" : true,
166
+ "pretrained" : false
167
+ },
168
+ "transf_encoder" : {
169
+ "attn_layer_dropout" : 0,
170
+ "attn_score_dropout" : 0,
171
+ "ffn_dropout" : 0,
172
+ "hidden_size" : 1024,
173
+ "inner_size" : 4096,
174
+ "mask_future" : false,
175
+ "num_attention_heads" : 8,
176
+ "num_layers" : 0,
177
+ "pre_ln" : true,
178
+ "pre_ln_final_layer_norm" : true
179
+ },
180
+ "use_loss_mask_for_prompt" : false,
181
+ "vocab_size" : 16384
182
+ }
conversion_summary.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "repo_id": "CohereLabs/cohere-transcribe-03-2026",
3
+ "source_snapshot": "/Users/akira/.cache/huggingface/hub/models--CohereLabs--cohere-transcribe-03-2026/snapshots/494db8a1d34a3aeb28e9ecf61bae9e7cdef455b9",
4
+ "dtype": "float16",
5
+ "num_output_tensors": 1896,
6
+ "num_qkv_merges": 128,
7
+ "output_file": "/Volumes/DATA/mlx-audio-swift/cohere_asr_tmp/mlx_converted/cohere_transcribe_mlx.safetensors",
8
+ "sample_mappings": {
9
+ "encoder.layers.0.conv.batch_norm.num_batches_tracked": "encoder.layers.0.conv.batch_norm.num_batches_tracked",
10
+ "encoder.layers.1.conv.batch_norm.num_batches_tracked": "encoder.layers.1.conv.batch_norm.num_batches_tracked",
11
+ "encoder.layers.10.conv.batch_norm.num_batches_tracked": "encoder.layers.10.conv.batch_norm.num_batches_tracked",
12
+ "encoder.layers.11.conv.batch_norm.num_batches_tracked": "encoder.layers.11.conv.batch_norm.num_batches_tracked",
13
+ "encoder.layers.12.conv.batch_norm.num_batches_tracked": "encoder.layers.12.conv.batch_norm.num_batches_tracked",
14
+ "encoder.layers.13.conv.batch_norm.num_batches_tracked": "encoder.layers.13.conv.batch_norm.num_batches_tracked",
15
+ "encoder.layers.14.conv.batch_norm.num_batches_tracked": "encoder.layers.14.conv.batch_norm.num_batches_tracked",
16
+ "encoder.layers.15.conv.batch_norm.num_batches_tracked": "encoder.layers.15.conv.batch_norm.num_batches_tracked",
17
+ "encoder.layers.16.conv.batch_norm.num_batches_tracked": "encoder.layers.16.conv.batch_norm.num_batches_tracked",
18
+ "encoder.layers.17.conv.batch_norm.num_batches_tracked": "encoder.layers.17.conv.batch_norm.num_batches_tracked",
19
+ "encoder.layers.18.conv.batch_norm.num_batches_tracked": "encoder.layers.18.conv.batch_norm.num_batches_tracked",
20
+ "encoder.layers.19.conv.batch_norm.num_batches_tracked": "encoder.layers.19.conv.batch_norm.num_batches_tracked",
21
+ "encoder.layers.2.conv.batch_norm.num_batches_tracked": "encoder.layers.2.conv.batch_norm.num_batches_tracked",
22
+ "encoder.layers.20.conv.batch_norm.num_batches_tracked": "encoder.layers.20.conv.batch_norm.num_batches_tracked",
23
+ "encoder.layers.21.conv.batch_norm.num_batches_tracked": "encoder.layers.21.conv.batch_norm.num_batches_tracked",
24
+ "encoder.layers.22.conv.batch_norm.num_batches_tracked": "encoder.layers.22.conv.batch_norm.num_batches_tracked",
25
+ "encoder.layers.23.conv.batch_norm.num_batches_tracked": "encoder.layers.23.conv.batch_norm.num_batches_tracked",
26
+ "encoder.layers.24.conv.batch_norm.num_batches_tracked": "encoder.layers.24.conv.batch_norm.num_batches_tracked",
27
+ "encoder.layers.25.conv.batch_norm.num_batches_tracked": "encoder.layers.25.conv.batch_norm.num_batches_tracked",
28
+ "encoder.layers.26.conv.batch_norm.num_batches_tracked": "encoder.layers.26.conv.batch_norm.num_batches_tracked",
29
+ "encoder.layers.27.conv.batch_norm.num_batches_tracked": "encoder.layers.27.conv.batch_norm.num_batches_tracked",
30
+ "encoder.layers.28.conv.batch_norm.num_batches_tracked": "encoder.layers.28.conv.batch_norm.num_batches_tracked",
31
+ "encoder.layers.29.conv.batch_norm.num_batches_tracked": "encoder.layers.29.conv.batch_norm.num_batches_tracked",
32
+ "encoder.layers.3.conv.batch_norm.num_batches_tracked": "encoder.layers.3.conv.batch_norm.num_batches_tracked",
33
+ "encoder.layers.30.conv.batch_norm.num_batches_tracked": "encoder.layers.30.conv.batch_norm.num_batches_tracked",
34
+ "encoder.layers.31.conv.batch_norm.num_batches_tracked": "encoder.layers.31.conv.batch_norm.num_batches_tracked",
35
+ "encoder.layers.32.conv.batch_norm.num_batches_tracked": "encoder.layers.32.conv.batch_norm.num_batches_tracked",
36
+ "encoder.layers.33.conv.batch_norm.num_batches_tracked": "encoder.layers.33.conv.batch_norm.num_batches_tracked",
37
+ "encoder.layers.34.conv.batch_norm.num_batches_tracked": "encoder.layers.34.conv.batch_norm.num_batches_tracked",
38
+ "encoder.layers.35.conv.batch_norm.num_batches_tracked": "encoder.layers.35.conv.batch_norm.num_batches_tracked",
39
+ "encoder.layers.36.conv.batch_norm.num_batches_tracked": "encoder.layers.36.conv.batch_norm.num_batches_tracked",
40
+ "encoder.layers.37.conv.batch_norm.num_batches_tracked": "encoder.layers.37.conv.batch_norm.num_batches_tracked",
41
+ "encoder.layers.38.conv.batch_norm.num_batches_tracked": "encoder.layers.38.conv.batch_norm.num_batches_tracked",
42
+ "encoder.layers.39.conv.batch_norm.num_batches_tracked": "encoder.layers.39.conv.batch_norm.num_batches_tracked",
43
+ "encoder.layers.4.conv.batch_norm.num_batches_tracked": "encoder.layers.4.conv.batch_norm.num_batches_tracked",
44
+ "encoder.layers.40.conv.batch_norm.num_batches_tracked": "encoder.layers.40.conv.batch_norm.num_batches_tracked",
45
+ "encoder.layers.41.conv.batch_norm.num_batches_tracked": "encoder.layers.41.conv.batch_norm.num_batches_tracked",
46
+ "encoder.layers.42.conv.batch_norm.num_batches_tracked": "encoder.layers.42.conv.batch_norm.num_batches_tracked",
47
+ "encoder.layers.43.conv.batch_norm.num_batches_tracked": "encoder.layers.43.conv.batch_norm.num_batches_tracked",
48
+ "encoder.layers.44.conv.batch_norm.num_batches_tracked": "encoder.layers.44.conv.batch_norm.num_batches_tracked"
49
+ },
50
+ "all_mappings_file": "/Volumes/DATA/mlx-audio-swift/cohere_asr_tmp/mlx_converted/key_map.json"
51
+ }
key_map.json ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1edbe982f47d22e64ba5723a4204b43ca93ea703b18946160d44ec26c83ab9
3
+ size 2418031831
preprocessor_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoFeatureExtractor": "processing_cohere_asr.CohereAsrFeatureExtractor"
4
+ },
5
+ "dither": 1e-05,
6
+ "feature_extractor_type": "CohereAsrFeatureExtractor",
7
+ "feature_size": 128,
8
+ "frame_splicing": 1,
9
+ "log": true,
10
+ "n_fft": 512,
11
+ "n_window_size": 400,
12
+ "n_window_stride": 160,
13
+ "normalize": "per_feature",
14
+ "pad_to": 0,
15
+ "padding_value": 0.0,
16
+ "sampling_rate": 16000,
17
+ "window": "hann"
18
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|nospeech|>",
4
+ "<|pnc|>",
5
+ "<|nopnc|>",
6
+ "<|startofcontext|>",
7
+ "<|itn|>",
8
+ "<|noitn|>",
9
+ "<|timestamp|>",
10
+ "<|notimestamp|>",
11
+ "<|diarize|>",
12
+ "<|nodiarize|>",
13
+ "<|spkchange|>",
14
+ "<|audioseparator|>",
15
+ "<|emo:undefined|>",
16
+ "<|emo:neutral|>",
17
+ "<|emo:happy|>",
18
+ "<|emo:sad|>",
19
+ "<|emo:angry|>",
20
+ "<|unklang|>",
21
+ "<|aa|>",
22
+ "<|ab|>",
23
+ "<|af|>",
24
+ "<|ak|>",
25
+ "<|sq|>",
26
+ "<|am|>",
27
+ "<|ar|>",
28
+ "<|an|>",
29
+ "<|hy|>",
30
+ "<|as|>",
31
+ "<|av|>",
32
+ "<|ae|>",
33
+ "<|ay|>",
34
+ "<|az|>",
35
+ "<|bm|>",
36
+ "<|ba|>",
37
+ "<|eu|>",
38
+ "<|be|>",
39
+ "<|bn|>",
40
+ "<|bi|>",
41
+ "<|bs|>",
42
+ "<|br|>",
43
+ "<|bg|>",
44
+ "<|my|>",
45
+ "<|ca|>",
46
+ "<|ch|>",
47
+ "<|ce|>",
48
+ "<|ny|>",
49
+ "<|zh|>",
50
+ "<|cu|>",
51
+ "<|cv|>",
52
+ "<|kw|>",
53
+ "<|co|>",
54
+ "<|cr|>",
55
+ "<|hr|>",
56
+ "<|cs|>",
57
+ "<|da|>",
58
+ "<|dv|>",
59
+ "<|nl|>",
60
+ "<|dz|>",
61
+ "<|en|>",
62
+ "<|eo|>",
63
+ "<|et|>",
64
+ "<|ee|>",
65
+ "<|fo|>",
66
+ "<|fj|>",
67
+ "<|fi|>",
68
+ "<|fr|>",
69
+ "<|fy|>",
70
+ "<|ff|>",
71
+ "<|gd|>",
72
+ "<|gl|>",
73
+ "<|lg|>",
74
+ "<|ka|>",
75
+ "<|de|>",
76
+ "<|el|>",
77
+ "<|kl|>",
78
+ "<|gn|>",
79
+ "<|gu|>",
80
+ "<|ht|>",
81
+ "<|ha|>",
82
+ "<|he|>",
83
+ "<|hz|>",
84
+ "<|hi|>",
85
+ "<|ho|>",
86
+ "<|hu|>",
87
+ "<|is|>",
88
+ "<|io|>",
89
+ "<|ig|>",
90
+ "<|id|>",
91
+ "<|ia|>",
92
+ "<|ie|>",
93
+ "<|iu|>",
94
+ "<|ik|>",
95
+ "<|ga|>",
96
+ "<|it|>",
97
+ "<|ja|>",
98
+ "<|jv|>",
99
+ "<|kn|>",
100
+ "<|kr|>",
101
+ "<|ks|>",
102
+ "<|kk|>",
103
+ "<|km|>",
104
+ "<|ki|>",
105
+ "<|rw|>",
106
+ "<|ky|>",
107
+ "<|kv|>",
108
+ "<|kg|>",
109
+ "<|ko|>",
110
+ "<|kj|>",
111
+ "<|ku|>",
112
+ "<|lo|>",
113
+ "<|la|>",
114
+ "<|lv|>",
115
+ "<|li|>",
116
+ "<|ln|>",
117
+ "<|lt|>",
118
+ "<|lu|>",
119
+ "<|lb|>",
120
+ "<|mk|>",
121
+ "<|mg|>",
122
+ "<|ms|>",
123
+ "<|ml|>",
124
+ "<|mt|>",
125
+ "<|gv|>",
126
+ "<|mi|>",
127
+ "<|mr|>",
128
+ "<|mh|>",
129
+ "<|mn|>",
130
+ "<|na|>",
131
+ "<|nv|>",
132
+ "<|nd|>",
133
+ "<|nr|>",
134
+ "<|ng|>",
135
+ "<|ne|>",
136
+ "<|no|>",
137
+ "<|nb|>",
138
+ "<|nn|>",
139
+ "<|oc|>",
140
+ "<|oj|>",
141
+ "<|or|>",
142
+ "<|om|>",
143
+ "<|os|>",
144
+ "<|pi|>",
145
+ "<|ps|>",
146
+ "<|fa|>",
147
+ "<|pl|>",
148
+ "<|pt|>",
149
+ "<|pa|>",
150
+ "<|qu|>",
151
+ "<|ro|>",
152
+ "<|rm|>",
153
+ "<|rn|>",
154
+ "<|ru|>",
155
+ "<|se|>",
156
+ "<|sm|>",
157
+ "<|sg|>",
158
+ "<|sa|>",
159
+ "<|sc|>",
160
+ "<|sr|>",
161
+ "<|sn|>",
162
+ "<|sd|>",
163
+ "<|si|>",
164
+ "<|sk|>",
165
+ "<|sl|>",
166
+ "<|so|>",
167
+ "<|st|>",
168
+ "<|es|>",
169
+ "<|su|>",
170
+ "<|sw|>",
171
+ "<|ss|>",
172
+ "<|sv|>",
173
+ "<|tl|>",
174
+ "<|ty|>",
175
+ "<|tg|>",
176
+ "<|ta|>",
177
+ "<|tt|>",
178
+ "<|te|>",
179
+ "<|th|>",
180
+ "<|bo|>",
181
+ "<|ti|>",
182
+ "<|to|>",
183
+ "<|ts|>",
184
+ "<|tn|>",
185
+ "<|tr|>",
186
+ "<|tk|>",
187
+ "<|tw|>",
188
+ "<|ug|>",
189
+ "<|uk|>",
190
+ "<|ur|>",
191
+ "<|uz|>",
192
+ "<|ve|>",
193
+ "<|vi|>",
194
+ "<|vo|>",
195
+ "<|wa|>",
196
+ "<|cy|>",
197
+ "<|wo|>",
198
+ "<|xh|>",
199
+ "<|ii|>",
200
+ "<|yi|>",
201
+ "<|yo|>",
202
+ "<|za|>",
203
+ "<|zu|>",
204
+ "<|spk0|>",
205
+ "<|spk1|>",
206
+ "<|spk2|>",
207
+ "<|spk3|>",
208
+ "<|spk4|>",
209
+ "<|spk5|>",
210
+ "<|spk6|>",
211
+ "<|spk7|>",
212
+ "<|spk8|>",
213
+ "<|spk9|>",
214
+ "<|spk10|>",
215
+ "<|spk11|>",
216
+ "<|spk12|>",
217
+ "<|spk13|>",
218
+ "<|spk14|>",
219
+ "<|spk15|>",
220
+ "<|spltoken0|>",
221
+ "<|spltoken1|>",
222
+ "<|spltoken2|>",
223
+ "<|spltoken3|>",
224
+ "<|spltoken4|>",
225
+ "<|spltoken5|>",
226
+ "<|spltoken6|>",
227
+ "<|spltoken7|>",
228
+ "<|spltoken8|>",
229
+ "<|spltoken9|>",
230
+ "<|spltoken10|>",
231
+ "<|spltoken11|>",
232
+ "<|spltoken12|>",
233
+ "<|spltoken13|>",
234
+ "<|spltoken14|>",
235
+ "<|spltoken15|>",
236
+ "<|spltoken16|>",
237
+ "<|spltoken17|>",
238
+ "<|spltoken18|>",
239
+ "<|spltoken19|>",
240
+ "<|spltoken20|>",
241
+ "<|spltoken21|>",
242
+ "<|spltoken22|>",
243
+ "<|spltoken23|>",
244
+ "<|spltoken24|>",
245
+ "<|spltoken25|>",
246
+ "<|spltoken26|>",
247
+ "<|spltoken27|>",
248
+ "<|spltoken28|>",
249
+ "<|spltoken29|>",
250
+ "<|spltoken30|>",
251
+ "<|spltoken31|>",
252
+ "<|spltoken32|>",
253
+ "<|spltoken33|>"
254
+ ],
255
+ "bos_token": "<|startoftranscript|>",
256
+ "eos_token": "<|endoftext|>",
257
+ "pad_token": "<pad>",
258
+ "unk_token": "<unk>"
259
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d21e6a83b2d0d3e1241a7817e4bef8eb63bcb7cfe4a2675af9a35ff3bbf0e14
3
+ size 492827
tokenizer_config.json ADDED
@@ -0,0 +1,2314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<unk>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|nospeech|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "10": {
21
+ "content": "<|timestamp|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100": {
29
+ "content": "<|kn|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "101": {
37
+ "content": "<|kr|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "102": {
45
+ "content": "<|ks|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "103": {
53
+ "content": "<|kk|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "104": {
61
+ "content": "<|km|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "105": {
69
+ "content": "<|ki|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "106": {
77
+ "content": "<|rw|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "107": {
85
+ "content": "<|ky|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "108": {
93
+ "content": "<|kv|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "109": {
101
+ "content": "<|kg|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "11": {
109
+ "content": "<|notimestamp|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "110": {
117
+ "content": "<|ko|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "111": {
125
+ "content": "<|kj|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "112": {
133
+ "content": "<|ku|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "113": {
141
+ "content": "<|lo|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "114": {
149
+ "content": "<|la|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "115": {
157
+ "content": "<|lv|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "116": {
165
+ "content": "<|li|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "117": {
173
+ "content": "<|ln|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "118": {
181
+ "content": "<|lt|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "119": {
189
+ "content": "<|lu|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "12": {
197
+ "content": "<|diarize|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "120": {
205
+ "content": "<|lb|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "121": {
213
+ "content": "<|mk|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "122": {
221
+ "content": "<|mg|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "123": {
229
+ "content": "<|ms|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "124": {
237
+ "content": "<|ml|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "125": {
245
+ "content": "<|mt|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "126": {
253
+ "content": "<|gv|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "127": {
261
+ "content": "<|mi|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128": {
269
+ "content": "<|mr|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "129": {
277
+ "content": "<|mh|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "13": {
285
+ "content": "<|nodiarize|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "130": {
293
+ "content": "<|mn|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "131": {
301
+ "content": "<|na|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "132": {
309
+ "content": "<|nv|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "133": {
317
+ "content": "<|nd|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "134": {
325
+ "content": "<|nr|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "135": {
333
+ "content": "<|ng|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "136": {
341
+ "content": "<|ne|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "137": {
349
+ "content": "<|no|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "138": {
357
+ "content": "<|nb|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "139": {
365
+ "content": "<|nn|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "14": {
373
+ "content": "<|spkchange|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "140": {
381
+ "content": "<|oc|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "141": {
389
+ "content": "<|oj|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "142": {
397
+ "content": "<|or|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "143": {
405
+ "content": "<|om|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "144": {
413
+ "content": "<|os|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "145": {
421
+ "content": "<|pi|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "146": {
429
+ "content": "<|ps|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "147": {
437
+ "content": "<|fa|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "148": {
445
+ "content": "<|pl|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "149": {
453
+ "content": "<|pt|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "15": {
461
+ "content": "<|audioseparator|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "150": {
469
+ "content": "<|pa|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "151": {
477
+ "content": "<|qu|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "152": {
485
+ "content": "<|ro|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "153": {
493
+ "content": "<|rm|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "154": {
501
+ "content": "<|rn|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "155": {
509
+ "content": "<|ru|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "156": {
517
+ "content": "<|se|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "157": {
525
+ "content": "<|sm|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "158": {
533
+ "content": "<|sg|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "159": {
541
+ "content": "<|sa|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "16": {
549
+ "content": "<|emo:undefined|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "160": {
557
+ "content": "<|sc|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "161": {
565
+ "content": "<|sr|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "162": {
573
+ "content": "<|sn|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "163": {
581
+ "content": "<|sd|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "164": {
589
+ "content": "<|si|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "165": {
597
+ "content": "<|sk|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "166": {
605
+ "content": "<|sl|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "167": {
613
+ "content": "<|so|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "168": {
621
+ "content": "<|st|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "169": {
629
+ "content": "<|es|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "17": {
637
+ "content": "<|emo:neutral|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "170": {
645
+ "content": "<|su|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "171": {
653
+ "content": "<|sw|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "172": {
661
+ "content": "<|ss|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "173": {
669
+ "content": "<|sv|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "174": {
677
+ "content": "<|tl|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "175": {
685
+ "content": "<|ty|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "176": {
693
+ "content": "<|tg|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "177": {
701
+ "content": "<|ta|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "178": {
709
+ "content": "<|tt|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "179": {
717
+ "content": "<|te|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "18": {
725
+ "content": "<|emo:happy|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "180": {
733
+ "content": "<|th|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "181": {
741
+ "content": "<|bo|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "182": {
749
+ "content": "<|ti|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "183": {
757
+ "content": "<|to|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "184": {
765
+ "content": "<|ts|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "185": {
773
+ "content": "<|tn|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "186": {
781
+ "content": "<|tr|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "187": {
789
+ "content": "<|tk|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "188": {
797
+ "content": "<|tw|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "189": {
805
+ "content": "<|ug|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "19": {
813
+ "content": "<|emo:sad|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "190": {
821
+ "content": "<|uk|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "191": {
829
+ "content": "<|ur|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "192": {
837
+ "content": "<|uz|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "193": {
845
+ "content": "<|ve|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "194": {
853
+ "content": "<|vi|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "195": {
861
+ "content": "<|vo|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "196": {
869
+ "content": "<|wa|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "197": {
877
+ "content": "<|cy|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "198": {
885
+ "content": "<|wo|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "199": {
893
+ "content": "<|xh|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "2": {
901
+ "content": "<pad>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "20": {
909
+ "content": "<|emo:angry|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "200": {
917
+ "content": "<|ii|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "201": {
925
+ "content": "<|yi|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "202": {
933
+ "content": "<|yo|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "203": {
941
+ "content": "<|za|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "204": {
949
+ "content": "<|zu|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "205": {
957
+ "content": "<|spk0|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "206": {
965
+ "content": "<|spk1|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "207": {
973
+ "content": "<|spk2|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "208": {
981
+ "content": "<|spk3|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "209": {
989
+ "content": "<|spk4|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "21": {
997
+ "content": "<|unklang|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "210": {
1005
+ "content": "<|spk5|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "211": {
1013
+ "content": "<|spk6|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "212": {
1021
+ "content": "<|spk7|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "213": {
1029
+ "content": "<|spk8|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "214": {
1037
+ "content": "<|spk9|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "215": {
1045
+ "content": "<|spk10|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "216": {
1053
+ "content": "<|spk11|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "217": {
1061
+ "content": "<|spk12|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "218": {
1069
+ "content": "<|spk13|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "219": {
1077
+ "content": "<|spk14|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "22": {
1085
+ "content": "<|aa|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "220": {
1093
+ "content": "<|spk15|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "221": {
1101
+ "content": "<|spltoken0|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "222": {
1109
+ "content": "<|spltoken1|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "223": {
1117
+ "content": "<|spltoken2|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "224": {
1125
+ "content": "<|spltoken3|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "225": {
1133
+ "content": "<|spltoken4|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "226": {
1141
+ "content": "<|spltoken5|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "227": {
1149
+ "content": "<|spltoken6|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "228": {
1157
+ "content": "<|spltoken7|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "229": {
1165
+ "content": "<|spltoken8|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "23": {
1173
+ "content": "<|ab|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "230": {
1181
+ "content": "<|spltoken9|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "231": {
1189
+ "content": "<|spltoken10|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "232": {
1197
+ "content": "<|spltoken11|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "233": {
1205
+ "content": "<|spltoken12|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "234": {
1213
+ "content": "<|spltoken13|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "235": {
1221
+ "content": "<|spltoken14|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "236": {
1229
+ "content": "<|spltoken15|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "237": {
1237
+ "content": "<|spltoken16|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "238": {
1245
+ "content": "<|spltoken17|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "239": {
1253
+ "content": "<|spltoken18|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "24": {
1261
+ "content": "<|af|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "240": {
1269
+ "content": "<|spltoken19|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "241": {
1277
+ "content": "<|spltoken20|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "242": {
1285
+ "content": "<|spltoken21|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "243": {
1293
+ "content": "<|spltoken22|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "244": {
1301
+ "content": "<|spltoken23|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "245": {
1309
+ "content": "<|spltoken24|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "246": {
1317
+ "content": "<|spltoken25|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "247": {
1325
+ "content": "<|spltoken26|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "248": {
1333
+ "content": "<|spltoken27|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "249": {
1341
+ "content": "<|spltoken28|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "25": {
1349
+ "content": "<|ak|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "250": {
1357
+ "content": "<|spltoken29|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "251": {
1365
+ "content": "<|spltoken30|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "252": {
1373
+ "content": "<|spltoken31|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "253": {
1381
+ "content": "<|spltoken32|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "254": {
1389
+ "content": "<|spltoken33|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "26": {
1397
+ "content": "<|sq|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "27": {
1405
+ "content": "<|am|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "28": {
1413
+ "content": "<|ar|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "29": {
1421
+ "content": "<|an|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "3": {
1429
+ "content": "<|endoftext|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "30": {
1437
+ "content": "<|hy|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "31": {
1445
+ "content": "<|as|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "32": {
1453
+ "content": "<|av|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "33": {
1461
+ "content": "<|ae|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "34": {
1469
+ "content": "<|ay|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "35": {
1477
+ "content": "<|az|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "36": {
1485
+ "content": "<|bm|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "37": {
1493
+ "content": "<|ba|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "38": {
1501
+ "content": "<|eu|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "39": {
1509
+ "content": "<|be|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "4": {
1517
+ "content": "<|startoftranscript|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "40": {
1525
+ "content": "<|bn|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "41": {
1533
+ "content": "<|bi|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "42": {
1541
+ "content": "<|bs|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "43": {
1549
+ "content": "<|br|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "44": {
1557
+ "content": "<|bg|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "45": {
1565
+ "content": "<|my|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "46": {
1573
+ "content": "<|ca|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "47": {
1581
+ "content": "<|ch|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "48": {
1589
+ "content": "<|ce|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "49": {
1597
+ "content": "<|ny|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "5": {
1605
+ "content": "<|pnc|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "50": {
1613
+ "content": "<|zh|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "51": {
1621
+ "content": "<|cu|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "52": {
1629
+ "content": "<|cv|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "53": {
1637
+ "content": "<|kw|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "54": {
1645
+ "content": "<|co|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "55": {
1653
+ "content": "<|cr|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "56": {
1661
+ "content": "<|hr|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "57": {
1669
+ "content": "<|cs|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "58": {
1677
+ "content": "<|da|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "59": {
1685
+ "content": "<|dv|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "6": {
1693
+ "content": "<|nopnc|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "60": {
1701
+ "content": "<|nl|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "61": {
1709
+ "content": "<|dz|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "62": {
1717
+ "content": "<|en|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "63": {
1725
+ "content": "<|eo|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "64": {
1733
+ "content": "<|et|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "65": {
1741
+ "content": "<|ee|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "66": {
1749
+ "content": "<|fo|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "67": {
1757
+ "content": "<|fj|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "68": {
1765
+ "content": "<|fi|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "69": {
1773
+ "content": "<|fr|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "7": {
1781
+ "content": "<|startofcontext|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "70": {
1789
+ "content": "<|fy|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "71": {
1797
+ "content": "<|ff|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "72": {
1805
+ "content": "<|gd|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "73": {
1813
+ "content": "<|gl|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "74": {
1821
+ "content": "<|lg|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "75": {
1829
+ "content": "<|ka|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "76": {
1837
+ "content": "<|de|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "77": {
1845
+ "content": "<|el|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "78": {
1853
+ "content": "<|kl|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "79": {
1861
+ "content": "<|gn|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "8": {
1869
+ "content": "<|itn|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "80": {
1877
+ "content": "<|gu|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "81": {
1885
+ "content": "<|ht|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "82": {
1893
+ "content": "<|ha|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "83": {
1901
+ "content": "<|he|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "84": {
1909
+ "content": "<|hz|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "85": {
1917
+ "content": "<|hi|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "86": {
1925
+ "content": "<|ho|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "87": {
1933
+ "content": "<|hu|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "88": {
1941
+ "content": "<|is|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "89": {
1949
+ "content": "<|io|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "9": {
1957
+ "content": "<|noitn|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "90": {
1965
+ "content": "<|ig|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "91": {
1973
+ "content": "<|id|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "92": {
1981
+ "content": "<|ia|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "93": {
1989
+ "content": "<|ie|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "94": {
1997
+ "content": "<|iu|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "95": {
2005
+ "content": "<|ik|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "96": {
2013
+ "content": "<|ga|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "97": {
2021
+ "content": "<|it|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "98": {
2029
+ "content": "<|ja|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "99": {
2037
+ "content": "<|jv|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ }
2044
+ },
2045
+ "additional_special_tokens": [
2046
+ "<|nospeech|>",
2047
+ "<|pnc|>",
2048
+ "<|nopnc|>",
2049
+ "<|startofcontext|>",
2050
+ "<|itn|>",
2051
+ "<|noitn|>",
2052
+ "<|timestamp|>",
2053
+ "<|notimestamp|>",
2054
+ "<|diarize|>",
2055
+ "<|nodiarize|>",
2056
+ "<|spkchange|>",
2057
+ "<|audioseparator|>",
2058
+ "<|emo:undefined|>",
2059
+ "<|emo:neutral|>",
2060
+ "<|emo:happy|>",
2061
+ "<|emo:sad|>",
2062
+ "<|emo:angry|>",
2063
+ "<|unklang|>",
2064
+ "<|aa|>",
2065
+ "<|ab|>",
2066
+ "<|af|>",
2067
+ "<|ak|>",
2068
+ "<|sq|>",
2069
+ "<|am|>",
2070
+ "<|ar|>",
2071
+ "<|an|>",
2072
+ "<|hy|>",
2073
+ "<|as|>",
2074
+ "<|av|>",
2075
+ "<|ae|>",
2076
+ "<|ay|>",
2077
+ "<|az|>",
2078
+ "<|bm|>",
2079
+ "<|ba|>",
2080
+ "<|eu|>",
2081
+ "<|be|>",
2082
+ "<|bn|>",
2083
+ "<|bi|>",
2084
+ "<|bs|>",
2085
+ "<|br|>",
2086
+ "<|bg|>",
2087
+ "<|my|>",
2088
+ "<|ca|>",
2089
+ "<|ch|>",
2090
+ "<|ce|>",
2091
+ "<|ny|>",
2092
+ "<|zh|>",
2093
+ "<|cu|>",
2094
+ "<|cv|>",
2095
+ "<|kw|>",
2096
+ "<|co|>",
2097
+ "<|cr|>",
2098
+ "<|hr|>",
2099
+ "<|cs|>",
2100
+ "<|da|>",
2101
+ "<|dv|>",
2102
+ "<|nl|>",
2103
+ "<|dz|>",
2104
+ "<|en|>",
2105
+ "<|eo|>",
2106
+ "<|et|>",
2107
+ "<|ee|>",
2108
+ "<|fo|>",
2109
+ "<|fj|>",
2110
+ "<|fi|>",
2111
+ "<|fr|>",
2112
+ "<|fy|>",
2113
+ "<|ff|>",
2114
+ "<|gd|>",
2115
+ "<|gl|>",
2116
+ "<|lg|>",
2117
+ "<|ka|>",
2118
+ "<|de|>",
2119
+ "<|el|>",
2120
+ "<|kl|>",
2121
+ "<|gn|>",
2122
+ "<|gu|>",
2123
+ "<|ht|>",
2124
+ "<|ha|>",
2125
+ "<|he|>",
2126
+ "<|hz|>",
2127
+ "<|hi|>",
2128
+ "<|ho|>",
2129
+ "<|hu|>",
2130
+ "<|is|>",
2131
+ "<|io|>",
2132
+ "<|ig|>",
2133
+ "<|id|>",
2134
+ "<|ia|>",
2135
+ "<|ie|>",
2136
+ "<|iu|>",
2137
+ "<|ik|>",
2138
+ "<|ga|>",
2139
+ "<|it|>",
2140
+ "<|ja|>",
2141
+ "<|jv|>",
2142
+ "<|kn|>",
2143
+ "<|kr|>",
2144
+ "<|ks|>",
2145
+ "<|kk|>",
2146
+ "<|km|>",
2147
+ "<|ki|>",
2148
+ "<|rw|>",
2149
+ "<|ky|>",
2150
+ "<|kv|>",
2151
+ "<|kg|>",
2152
+ "<|ko|>",
2153
+ "<|kj|>",
2154
+ "<|ku|>",
2155
+ "<|lo|>",
2156
+ "<|la|>",
2157
+ "<|lv|>",
2158
+ "<|li|>",
2159
+ "<|ln|>",
2160
+ "<|lt|>",
2161
+ "<|lu|>",
2162
+ "<|lb|>",
2163
+ "<|mk|>",
2164
+ "<|mg|>",
2165
+ "<|ms|>",
2166
+ "<|ml|>",
2167
+ "<|mt|>",
2168
+ "<|gv|>",
2169
+ "<|mi|>",
2170
+ "<|mr|>",
2171
+ "<|mh|>",
2172
+ "<|mn|>",
2173
+ "<|na|>",
2174
+ "<|nv|>",
2175
+ "<|nd|>",
2176
+ "<|nr|>",
2177
+ "<|ng|>",
2178
+ "<|ne|>",
2179
+ "<|no|>",
2180
+ "<|nb|>",
2181
+ "<|nn|>",
2182
+ "<|oc|>",
2183
+ "<|oj|>",
2184
+ "<|or|>",
2185
+ "<|om|>",
2186
+ "<|os|>",
2187
+ "<|pi|>",
2188
+ "<|ps|>",
2189
+ "<|fa|>",
2190
+ "<|pl|>",
2191
+ "<|pt|>",
2192
+ "<|pa|>",
2193
+ "<|qu|>",
2194
+ "<|ro|>",
2195
+ "<|rm|>",
2196
+ "<|rn|>",
2197
+ "<|ru|>",
2198
+ "<|se|>",
2199
+ "<|sm|>",
2200
+ "<|sg|>",
2201
+ "<|sa|>",
2202
+ "<|sc|>",
2203
+ "<|sr|>",
2204
+ "<|sn|>",
2205
+ "<|sd|>",
2206
+ "<|si|>",
2207
+ "<|sk|>",
2208
+ "<|sl|>",
2209
+ "<|so|>",
2210
+ "<|st|>",
2211
+ "<|es|>",
2212
+ "<|su|>",
2213
+ "<|sw|>",
2214
+ "<|ss|>",
2215
+ "<|sv|>",
2216
+ "<|tl|>",
2217
+ "<|ty|>",
2218
+ "<|tg|>",
2219
+ "<|ta|>",
2220
+ "<|tt|>",
2221
+ "<|te|>",
2222
+ "<|th|>",
2223
+ "<|bo|>",
2224
+ "<|ti|>",
2225
+ "<|to|>",
2226
+ "<|ts|>",
2227
+ "<|tn|>",
2228
+ "<|tr|>",
2229
+ "<|tk|>",
2230
+ "<|tw|>",
2231
+ "<|ug|>",
2232
+ "<|uk|>",
2233
+ "<|ur|>",
2234
+ "<|uz|>",
2235
+ "<|ve|>",
2236
+ "<|vi|>",
2237
+ "<|vo|>",
2238
+ "<|wa|>",
2239
+ "<|cy|>",
2240
+ "<|wo|>",
2241
+ "<|xh|>",
2242
+ "<|ii|>",
2243
+ "<|yi|>",
2244
+ "<|yo|>",
2245
+ "<|za|>",
2246
+ "<|zu|>",
2247
+ "<|spk0|>",
2248
+ "<|spk1|>",
2249
+ "<|spk2|>",
2250
+ "<|spk3|>",
2251
+ "<|spk4|>",
2252
+ "<|spk5|>",
2253
+ "<|spk6|>",
2254
+ "<|spk7|>",
2255
+ "<|spk8|>",
2256
+ "<|spk9|>",
2257
+ "<|spk10|>",
2258
+ "<|spk11|>",
2259
+ "<|spk12|>",
2260
+ "<|spk13|>",
2261
+ "<|spk14|>",
2262
+ "<|spk15|>",
2263
+ "<|spltoken0|>",
2264
+ "<|spltoken1|>",
2265
+ "<|spltoken2|>",
2266
+ "<|spltoken3|>",
2267
+ "<|spltoken4|>",
2268
+ "<|spltoken5|>",
2269
+ "<|spltoken6|>",
2270
+ "<|spltoken7|>",
2271
+ "<|spltoken8|>",
2272
+ "<|spltoken9|>",
2273
+ "<|spltoken10|>",
2274
+ "<|spltoken11|>",
2275
+ "<|spltoken12|>",
2276
+ "<|spltoken13|>",
2277
+ "<|spltoken14|>",
2278
+ "<|spltoken15|>",
2279
+ "<|spltoken16|>",
2280
+ "<|spltoken17|>",
2281
+ "<|spltoken18|>",
2282
+ "<|spltoken19|>",
2283
+ "<|spltoken20|>",
2284
+ "<|spltoken21|>",
2285
+ "<|spltoken22|>",
2286
+ "<|spltoken23|>",
2287
+ "<|spltoken24|>",
2288
+ "<|spltoken25|>",
2289
+ "<|spltoken26|>",
2290
+ "<|spltoken27|>",
2291
+ "<|spltoken28|>",
2292
+ "<|spltoken29|>",
2293
+ "<|spltoken30|>",
2294
+ "<|spltoken31|>",
2295
+ "<|spltoken32|>",
2296
+ "<|spltoken33|>"
2297
+ ],
2298
+ "auto_map": {
2299
+ "AutoTokenizer": [
2300
+ "tokenization_cohere_asr.CohereAsrTokenizer",
2301
+ null
2302
+ ]
2303
+ },
2304
+ "bos_token": "<|startoftranscript|>",
2305
+ "clean_up_tokenization_spaces": false,
2306
+ "eos_token": "<|endoftext|>",
2307
+ "extra_special_tokens": {},
2308
+ "model_max_length": 2048,
2309
+ "pad_token": "<pad>",
2310
+ "sp_model_kwargs": {},
2311
+ "split_special_tokens": true,
2312
+ "tokenizer_class": "CohereAsrTokenizer",
2313
+ "unk_token": "<unk>"
2314
+ }