TheCluster commited on
Commit
67283dd
·
verified ·
1 Parent(s): 13ac58c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- for message in messages %}
79
+ {%- set content = render_content(message.content, true)|trim %}
80
+ {%- if message.role == "system" %}
81
+ {%- if not loop.first %}
82
+ {{- raise_exception('System message must be at the beginning.') }}
83
+ {%- endif %}
84
+ {%- elif message.role == "user" %}
85
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
86
+ {%- elif message.role == "assistant" %}
87
+ {%- set reasoning_content = '' %}
88
+ {%- if message.reasoning_content is string %}
89
+ {%- set reasoning_content = message.reasoning_content %}
90
+ {%- else %}
91
+ {%- if '</think>' in content %}
92
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
93
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
94
+ {%- endif %}
95
+ {%- endif %}
96
+ {%- set reasoning_content = reasoning_content|trim %}
97
+ {%- if loop.index0 > ns.last_query_index %}
98
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
99
+ {%- else %}
100
+ {{- '<|im_start|>' + message.role + '\n' + content }}
101
+ {%- endif %}
102
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
103
+ {%- for tool_call in message.tool_calls %}
104
+ {%- if tool_call.function is defined %}
105
+ {%- set tool_call = tool_call.function %}
106
+ {%- endif %}
107
+ {%- if loop.first %}
108
+ {%- if content|trim %}
109
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
110
+ {%- else %}
111
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
112
+ {%- endif %}
113
+ {%- else %}
114
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- if tool_call.arguments is defined %}
117
+ {%- for args_name, args_value in tool_call.arguments|items %}
118
+ {{- '<parameter=' + args_name + '>\n' }}
119
+ {%- set args_value = args_value | tojson if args_value is mapping or (args_value is iterable and args_value is not string) else args_value | string %}
120
+ {{- args_value }}
121
+ {{- '\n</parameter>\n' }}
122
+ {%- endfor %}
123
+ {%- endif %}
124
+ {{- '</function>\n</tool_call>' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '<|im_end|>\n' }}
128
+ {%- elif message.role == "tool" %}
129
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
130
+ {{- '<|im_start|>user' }}
131
+ {%- endif %}
132
+ {{- '\n<tool_response>\n' }}
133
+ {{- content }}
134
+ {{- '\n</tool_response>' }}
135
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
136
+ {{- '<|im_end|>\n' }}
137
+ {%- elif loop.last %}
138
+ {{- '<|im_end|>\n' }}
139
+ {%- endif %}
140
+ {%- else %}
141
+ {{- raise_exception('Unexpected message role.') }}
142
+ {%- endif %}
143
+ {%- endfor %}
144
+ {%- if add_generation_prompt %}
145
+ {{- '<|im_start|>assistant\n' }}
146
+ {%- if enable_thinking is not defined or enable_thinking is false %}
147
+ {{- '<think>\n\n</think>\n\n' }}
148
+ {%- else %}
149
+ {{- '<think>\n' }}
150
+ {%- endif %}
151
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5MoeForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": [
7
+ 248046,
8
+ 248044
9
+ ],
10
+ "image_token_id": 248056,
11
+ "model_type": "qwen3_5_moe",
12
+ "quantization": {
13
+ "group_size": 32,
14
+ "bits": 4,
15
+ "mode": "mxfp4",
16
+ "language_model.model.layers.0.mlp.gate": {
17
+ "group_size": 64,
18
+ "bits": 8
19
+ },
20
+ "language_model.model.layers.0.mlp.shared_expert_gate": {
21
+ "group_size": 64,
22
+ "bits": 8
23
+ },
24
+ "language_model.model.layers.1.mlp.gate": {
25
+ "group_size": 64,
26
+ "bits": 8
27
+ },
28
+ "language_model.model.layers.1.mlp.shared_expert_gate": {
29
+ "group_size": 64,
30
+ "bits": 8
31
+ },
32
+ "language_model.model.layers.2.mlp.gate": {
33
+ "group_size": 64,
34
+ "bits": 8
35
+ },
36
+ "language_model.model.layers.2.mlp.shared_expert_gate": {
37
+ "group_size": 64,
38
+ "bits": 8
39
+ },
40
+ "language_model.model.layers.3.mlp.gate": {
41
+ "group_size": 64,
42
+ "bits": 8
43
+ },
44
+ "language_model.model.layers.3.mlp.shared_expert_gate": {
45
+ "group_size": 64,
46
+ "bits": 8
47
+ },
48
+ "language_model.model.layers.4.mlp.gate": {
49
+ "group_size": 64,
50
+ "bits": 8
51
+ },
52
+ "language_model.model.layers.4.mlp.shared_expert_gate": {
53
+ "group_size": 64,
54
+ "bits": 8
55
+ },
56
+ "language_model.model.layers.5.mlp.gate": {
57
+ "group_size": 64,
58
+ "bits": 8
59
+ },
60
+ "language_model.model.layers.5.mlp.shared_expert_gate": {
61
+ "group_size": 64,
62
+ "bits": 8
63
+ },
64
+ "language_model.model.layers.6.mlp.gate": {
65
+ "group_size": 64,
66
+ "bits": 8
67
+ },
68
+ "language_model.model.layers.6.mlp.shared_expert_gate": {
69
+ "group_size": 64,
70
+ "bits": 8
71
+ },
72
+ "language_model.model.layers.7.mlp.gate": {
73
+ "group_size": 64,
74
+ "bits": 8
75
+ },
76
+ "language_model.model.layers.7.mlp.shared_expert_gate": {
77
+ "group_size": 64,
78
+ "bits": 8
79
+ },
80
+ "language_model.model.layers.8.mlp.gate": {
81
+ "group_size": 64,
82
+ "bits": 8
83
+ },
84
+ "language_model.model.layers.8.mlp.shared_expert_gate": {
85
+ "group_size": 64,
86
+ "bits": 8
87
+ },
88
+ "language_model.model.layers.9.mlp.gate": {
89
+ "group_size": 64,
90
+ "bits": 8
91
+ },
92
+ "language_model.model.layers.9.mlp.shared_expert_gate": {
93
+ "group_size": 64,
94
+ "bits": 8
95
+ },
96
+ "language_model.model.layers.10.mlp.gate": {
97
+ "group_size": 64,
98
+ "bits": 8
99
+ },
100
+ "language_model.model.layers.10.mlp.shared_expert_gate": {
101
+ "group_size": 64,
102
+ "bits": 8
103
+ },
104
+ "language_model.model.layers.11.mlp.gate": {
105
+ "group_size": 64,
106
+ "bits": 8
107
+ },
108
+ "language_model.model.layers.11.mlp.shared_expert_gate": {
109
+ "group_size": 64,
110
+ "bits": 8
111
+ },
112
+ "language_model.model.layers.12.mlp.gate": {
113
+ "group_size": 64,
114
+ "bits": 8
115
+ },
116
+ "language_model.model.layers.12.mlp.shared_expert_gate": {
117
+ "group_size": 64,
118
+ "bits": 8
119
+ },
120
+ "language_model.model.layers.13.mlp.gate": {
121
+ "group_size": 64,
122
+ "bits": 8
123
+ },
124
+ "language_model.model.layers.13.mlp.shared_expert_gate": {
125
+ "group_size": 64,
126
+ "bits": 8
127
+ },
128
+ "language_model.model.layers.14.mlp.gate": {
129
+ "group_size": 64,
130
+ "bits": 8
131
+ },
132
+ "language_model.model.layers.14.mlp.shared_expert_gate": {
133
+ "group_size": 64,
134
+ "bits": 8
135
+ },
136
+ "language_model.model.layers.15.mlp.gate": {
137
+ "group_size": 64,
138
+ "bits": 8
139
+ },
140
+ "language_model.model.layers.15.mlp.shared_expert_gate": {
141
+ "group_size": 64,
142
+ "bits": 8
143
+ },
144
+ "language_model.model.layers.16.mlp.gate": {
145
+ "group_size": 64,
146
+ "bits": 8
147
+ },
148
+ "language_model.model.layers.16.mlp.shared_expert_gate": {
149
+ "group_size": 64,
150
+ "bits": 8
151
+ },
152
+ "language_model.model.layers.17.mlp.gate": {
153
+ "group_size": 64,
154
+ "bits": 8
155
+ },
156
+ "language_model.model.layers.17.mlp.shared_expert_gate": {
157
+ "group_size": 64,
158
+ "bits": 8
159
+ },
160
+ "language_model.model.layers.18.mlp.gate": {
161
+ "group_size": 64,
162
+ "bits": 8
163
+ },
164
+ "language_model.model.layers.18.mlp.shared_expert_gate": {
165
+ "group_size": 64,
166
+ "bits": 8
167
+ },
168
+ "language_model.model.layers.19.mlp.gate": {
169
+ "group_size": 64,
170
+ "bits": 8
171
+ },
172
+ "language_model.model.layers.19.mlp.shared_expert_gate": {
173
+ "group_size": 64,
174
+ "bits": 8
175
+ },
176
+ "language_model.model.layers.20.mlp.gate": {
177
+ "group_size": 64,
178
+ "bits": 8
179
+ },
180
+ "language_model.model.layers.20.mlp.shared_expert_gate": {
181
+ "group_size": 64,
182
+ "bits": 8
183
+ },
184
+ "language_model.model.layers.21.mlp.gate": {
185
+ "group_size": 64,
186
+ "bits": 8
187
+ },
188
+ "language_model.model.layers.21.mlp.shared_expert_gate": {
189
+ "group_size": 64,
190
+ "bits": 8
191
+ },
192
+ "language_model.model.layers.22.mlp.gate": {
193
+ "group_size": 64,
194
+ "bits": 8
195
+ },
196
+ "language_model.model.layers.22.mlp.shared_expert_gate": {
197
+ "group_size": 64,
198
+ "bits": 8
199
+ },
200
+ "language_model.model.layers.23.mlp.gate": {
201
+ "group_size": 64,
202
+ "bits": 8
203
+ },
204
+ "language_model.model.layers.23.mlp.shared_expert_gate": {
205
+ "group_size": 64,
206
+ "bits": 8
207
+ },
208
+ "language_model.model.layers.24.mlp.gate": {
209
+ "group_size": 64,
210
+ "bits": 8
211
+ },
212
+ "language_model.model.layers.24.mlp.shared_expert_gate": {
213
+ "group_size": 64,
214
+ "bits": 8
215
+ },
216
+ "language_model.model.layers.25.mlp.gate": {
217
+ "group_size": 64,
218
+ "bits": 8
219
+ },
220
+ "language_model.model.layers.25.mlp.shared_expert_gate": {
221
+ "group_size": 64,
222
+ "bits": 8
223
+ },
224
+ "language_model.model.layers.26.mlp.gate": {
225
+ "group_size": 64,
226
+ "bits": 8
227
+ },
228
+ "language_model.model.layers.26.mlp.shared_expert_gate": {
229
+ "group_size": 64,
230
+ "bits": 8
231
+ },
232
+ "language_model.model.layers.27.mlp.gate": {
233
+ "group_size": 64,
234
+ "bits": 8
235
+ },
236
+ "language_model.model.layers.27.mlp.shared_expert_gate": {
237
+ "group_size": 64,
238
+ "bits": 8
239
+ },
240
+ "language_model.model.layers.28.mlp.gate": {
241
+ "group_size": 64,
242
+ "bits": 8
243
+ },
244
+ "language_model.model.layers.28.mlp.shared_expert_gate": {
245
+ "group_size": 64,
246
+ "bits": 8
247
+ },
248
+ "language_model.model.layers.29.mlp.gate": {
249
+ "group_size": 64,
250
+ "bits": 8
251
+ },
252
+ "language_model.model.layers.29.mlp.shared_expert_gate": {
253
+ "group_size": 64,
254
+ "bits": 8
255
+ },
256
+ "language_model.model.layers.30.mlp.gate": {
257
+ "group_size": 64,
258
+ "bits": 8
259
+ },
260
+ "language_model.model.layers.30.mlp.shared_expert_gate": {
261
+ "group_size": 64,
262
+ "bits": 8
263
+ },
264
+ "language_model.model.layers.31.mlp.gate": {
265
+ "group_size": 64,
266
+ "bits": 8
267
+ },
268
+ "language_model.model.layers.31.mlp.shared_expert_gate": {
269
+ "group_size": 64,
270
+ "bits": 8
271
+ },
272
+ "language_model.model.layers.32.mlp.gate": {
273
+ "group_size": 64,
274
+ "bits": 8
275
+ },
276
+ "language_model.model.layers.32.mlp.shared_expert_gate": {
277
+ "group_size": 64,
278
+ "bits": 8
279
+ },
280
+ "language_model.model.layers.33.mlp.gate": {
281
+ "group_size": 64,
282
+ "bits": 8
283
+ },
284
+ "language_model.model.layers.33.mlp.shared_expert_gate": {
285
+ "group_size": 64,
286
+ "bits": 8
287
+ },
288
+ "language_model.model.layers.34.mlp.gate": {
289
+ "group_size": 64,
290
+ "bits": 8
291
+ },
292
+ "language_model.model.layers.34.mlp.shared_expert_gate": {
293
+ "group_size": 64,
294
+ "bits": 8
295
+ },
296
+ "language_model.model.layers.35.mlp.gate": {
297
+ "group_size": 64,
298
+ "bits": 8
299
+ },
300
+ "language_model.model.layers.35.mlp.shared_expert_gate": {
301
+ "group_size": 64,
302
+ "bits": 8
303
+ },
304
+ "language_model.model.layers.36.mlp.gate": {
305
+ "group_size": 64,
306
+ "bits": 8
307
+ },
308
+ "language_model.model.layers.36.mlp.shared_expert_gate": {
309
+ "group_size": 64,
310
+ "bits": 8
311
+ },
312
+ "language_model.model.layers.37.mlp.gate": {
313
+ "group_size": 64,
314
+ "bits": 8
315
+ },
316
+ "language_model.model.layers.37.mlp.shared_expert_gate": {
317
+ "group_size": 64,
318
+ "bits": 8
319
+ },
320
+ "language_model.model.layers.38.mlp.gate": {
321
+ "group_size": 64,
322
+ "bits": 8
323
+ },
324
+ "language_model.model.layers.38.mlp.shared_expert_gate": {
325
+ "group_size": 64,
326
+ "bits": 8
327
+ },
328
+ "language_model.model.layers.39.mlp.gate": {
329
+ "group_size": 64,
330
+ "bits": 8
331
+ },
332
+ "language_model.model.layers.39.mlp.shared_expert_gate": {
333
+ "group_size": 64,
334
+ "bits": 8
335
+ }
336
+ },
337
+ "quantization_config": {
338
+ "group_size": 32,
339
+ "bits": 4,
340
+ "mode": "mxfp4",
341
+ "language_model.model.layers.0.mlp.gate": {
342
+ "group_size": 64,
343
+ "bits": 8
344
+ },
345
+ "language_model.model.layers.0.mlp.shared_expert_gate": {
346
+ "group_size": 64,
347
+ "bits": 8
348
+ },
349
+ "language_model.model.layers.1.mlp.gate": {
350
+ "group_size": 64,
351
+ "bits": 8
352
+ },
353
+ "language_model.model.layers.1.mlp.shared_expert_gate": {
354
+ "group_size": 64,
355
+ "bits": 8
356
+ },
357
+ "language_model.model.layers.2.mlp.gate": {
358
+ "group_size": 64,
359
+ "bits": 8
360
+ },
361
+ "language_model.model.layers.2.mlp.shared_expert_gate": {
362
+ "group_size": 64,
363
+ "bits": 8
364
+ },
365
+ "language_model.model.layers.3.mlp.gate": {
366
+ "group_size": 64,
367
+ "bits": 8
368
+ },
369
+ "language_model.model.layers.3.mlp.shared_expert_gate": {
370
+ "group_size": 64,
371
+ "bits": 8
372
+ },
373
+ "language_model.model.layers.4.mlp.gate": {
374
+ "group_size": 64,
375
+ "bits": 8
376
+ },
377
+ "language_model.model.layers.4.mlp.shared_expert_gate": {
378
+ "group_size": 64,
379
+ "bits": 8
380
+ },
381
+ "language_model.model.layers.5.mlp.gate": {
382
+ "group_size": 64,
383
+ "bits": 8
384
+ },
385
+ "language_model.model.layers.5.mlp.shared_expert_gate": {
386
+ "group_size": 64,
387
+ "bits": 8
388
+ },
389
+ "language_model.model.layers.6.mlp.gate": {
390
+ "group_size": 64,
391
+ "bits": 8
392
+ },
393
+ "language_model.model.layers.6.mlp.shared_expert_gate": {
394
+ "group_size": 64,
395
+ "bits": 8
396
+ },
397
+ "language_model.model.layers.7.mlp.gate": {
398
+ "group_size": 64,
399
+ "bits": 8
400
+ },
401
+ "language_model.model.layers.7.mlp.shared_expert_gate": {
402
+ "group_size": 64,
403
+ "bits": 8
404
+ },
405
+ "language_model.model.layers.8.mlp.gate": {
406
+ "group_size": 64,
407
+ "bits": 8
408
+ },
409
+ "language_model.model.layers.8.mlp.shared_expert_gate": {
410
+ "group_size": 64,
411
+ "bits": 8
412
+ },
413
+ "language_model.model.layers.9.mlp.gate": {
414
+ "group_size": 64,
415
+ "bits": 8
416
+ },
417
+ "language_model.model.layers.9.mlp.shared_expert_gate": {
418
+ "group_size": 64,
419
+ "bits": 8
420
+ },
421
+ "language_model.model.layers.10.mlp.gate": {
422
+ "group_size": 64,
423
+ "bits": 8
424
+ },
425
+ "language_model.model.layers.10.mlp.shared_expert_gate": {
426
+ "group_size": 64,
427
+ "bits": 8
428
+ },
429
+ "language_model.model.layers.11.mlp.gate": {
430
+ "group_size": 64,
431
+ "bits": 8
432
+ },
433
+ "language_model.model.layers.11.mlp.shared_expert_gate": {
434
+ "group_size": 64,
435
+ "bits": 8
436
+ },
437
+ "language_model.model.layers.12.mlp.gate": {
438
+ "group_size": 64,
439
+ "bits": 8
440
+ },
441
+ "language_model.model.layers.12.mlp.shared_expert_gate": {
442
+ "group_size": 64,
443
+ "bits": 8
444
+ },
445
+ "language_model.model.layers.13.mlp.gate": {
446
+ "group_size": 64,
447
+ "bits": 8
448
+ },
449
+ "language_model.model.layers.13.mlp.shared_expert_gate": {
450
+ "group_size": 64,
451
+ "bits": 8
452
+ },
453
+ "language_model.model.layers.14.mlp.gate": {
454
+ "group_size": 64,
455
+ "bits": 8
456
+ },
457
+ "language_model.model.layers.14.mlp.shared_expert_gate": {
458
+ "group_size": 64,
459
+ "bits": 8
460
+ },
461
+ "language_model.model.layers.15.mlp.gate": {
462
+ "group_size": 64,
463
+ "bits": 8
464
+ },
465
+ "language_model.model.layers.15.mlp.shared_expert_gate": {
466
+ "group_size": 64,
467
+ "bits": 8
468
+ },
469
+ "language_model.model.layers.16.mlp.gate": {
470
+ "group_size": 64,
471
+ "bits": 8
472
+ },
473
+ "language_model.model.layers.16.mlp.shared_expert_gate": {
474
+ "group_size": 64,
475
+ "bits": 8
476
+ },
477
+ "language_model.model.layers.17.mlp.gate": {
478
+ "group_size": 64,
479
+ "bits": 8
480
+ },
481
+ "language_model.model.layers.17.mlp.shared_expert_gate": {
482
+ "group_size": 64,
483
+ "bits": 8
484
+ },
485
+ "language_model.model.layers.18.mlp.gate": {
486
+ "group_size": 64,
487
+ "bits": 8
488
+ },
489
+ "language_model.model.layers.18.mlp.shared_expert_gate": {
490
+ "group_size": 64,
491
+ "bits": 8
492
+ },
493
+ "language_model.model.layers.19.mlp.gate": {
494
+ "group_size": 64,
495
+ "bits": 8
496
+ },
497
+ "language_model.model.layers.19.mlp.shared_expert_gate": {
498
+ "group_size": 64,
499
+ "bits": 8
500
+ },
501
+ "language_model.model.layers.20.mlp.gate": {
502
+ "group_size": 64,
503
+ "bits": 8
504
+ },
505
+ "language_model.model.layers.20.mlp.shared_expert_gate": {
506
+ "group_size": 64,
507
+ "bits": 8
508
+ },
509
+ "language_model.model.layers.21.mlp.gate": {
510
+ "group_size": 64,
511
+ "bits": 8
512
+ },
513
+ "language_model.model.layers.21.mlp.shared_expert_gate": {
514
+ "group_size": 64,
515
+ "bits": 8
516
+ },
517
+ "language_model.model.layers.22.mlp.gate": {
518
+ "group_size": 64,
519
+ "bits": 8
520
+ },
521
+ "language_model.model.layers.22.mlp.shared_expert_gate": {
522
+ "group_size": 64,
523
+ "bits": 8
524
+ },
525
+ "language_model.model.layers.23.mlp.gate": {
526
+ "group_size": 64,
527
+ "bits": 8
528
+ },
529
+ "language_model.model.layers.23.mlp.shared_expert_gate": {
530
+ "group_size": 64,
531
+ "bits": 8
532
+ },
533
+ "language_model.model.layers.24.mlp.gate": {
534
+ "group_size": 64,
535
+ "bits": 8
536
+ },
537
+ "language_model.model.layers.24.mlp.shared_expert_gate": {
538
+ "group_size": 64,
539
+ "bits": 8
540
+ },
541
+ "language_model.model.layers.25.mlp.gate": {
542
+ "group_size": 64,
543
+ "bits": 8
544
+ },
545
+ "language_model.model.layers.25.mlp.shared_expert_gate": {
546
+ "group_size": 64,
547
+ "bits": 8
548
+ },
549
+ "language_model.model.layers.26.mlp.gate": {
550
+ "group_size": 64,
551
+ "bits": 8
552
+ },
553
+ "language_model.model.layers.26.mlp.shared_expert_gate": {
554
+ "group_size": 64,
555
+ "bits": 8
556
+ },
557
+ "language_model.model.layers.27.mlp.gate": {
558
+ "group_size": 64,
559
+ "bits": 8
560
+ },
561
+ "language_model.model.layers.27.mlp.shared_expert_gate": {
562
+ "group_size": 64,
563
+ "bits": 8
564
+ },
565
+ "language_model.model.layers.28.mlp.gate": {
566
+ "group_size": 64,
567
+ "bits": 8
568
+ },
569
+ "language_model.model.layers.28.mlp.shared_expert_gate": {
570
+ "group_size": 64,
571
+ "bits": 8
572
+ },
573
+ "language_model.model.layers.29.mlp.gate": {
574
+ "group_size": 64,
575
+ "bits": 8
576
+ },
577
+ "language_model.model.layers.29.mlp.shared_expert_gate": {
578
+ "group_size": 64,
579
+ "bits": 8
580
+ },
581
+ "language_model.model.layers.30.mlp.gate": {
582
+ "group_size": 64,
583
+ "bits": 8
584
+ },
585
+ "language_model.model.layers.30.mlp.shared_expert_gate": {
586
+ "group_size": 64,
587
+ "bits": 8
588
+ },
589
+ "language_model.model.layers.31.mlp.gate": {
590
+ "group_size": 64,
591
+ "bits": 8
592
+ },
593
+ "language_model.model.layers.31.mlp.shared_expert_gate": {
594
+ "group_size": 64,
595
+ "bits": 8
596
+ },
597
+ "language_model.model.layers.32.mlp.gate": {
598
+ "group_size": 64,
599
+ "bits": 8
600
+ },
601
+ "language_model.model.layers.32.mlp.shared_expert_gate": {
602
+ "group_size": 64,
603
+ "bits": 8
604
+ },
605
+ "language_model.model.layers.33.mlp.gate": {
606
+ "group_size": 64,
607
+ "bits": 8
608
+ },
609
+ "language_model.model.layers.33.mlp.shared_expert_gate": {
610
+ "group_size": 64,
611
+ "bits": 8
612
+ },
613
+ "language_model.model.layers.34.mlp.gate": {
614
+ "group_size": 64,
615
+ "bits": 8
616
+ },
617
+ "language_model.model.layers.34.mlp.shared_expert_gate": {
618
+ "group_size": 64,
619
+ "bits": 8
620
+ },
621
+ "language_model.model.layers.35.mlp.gate": {
622
+ "group_size": 64,
623
+ "bits": 8
624
+ },
625
+ "language_model.model.layers.35.mlp.shared_expert_gate": {
626
+ "group_size": 64,
627
+ "bits": 8
628
+ },
629
+ "language_model.model.layers.36.mlp.gate": {
630
+ "group_size": 64,
631
+ "bits": 8
632
+ },
633
+ "language_model.model.layers.36.mlp.shared_expert_gate": {
634
+ "group_size": 64,
635
+ "bits": 8
636
+ },
637
+ "language_model.model.layers.37.mlp.gate": {
638
+ "group_size": 64,
639
+ "bits": 8
640
+ },
641
+ "language_model.model.layers.37.mlp.shared_expert_gate": {
642
+ "group_size": 64,
643
+ "bits": 8
644
+ },
645
+ "language_model.model.layers.38.mlp.gate": {
646
+ "group_size": 64,
647
+ "bits": 8
648
+ },
649
+ "language_model.model.layers.38.mlp.shared_expert_gate": {
650
+ "group_size": 64,
651
+ "bits": 8
652
+ },
653
+ "language_model.model.layers.39.mlp.gate": {
654
+ "group_size": 64,
655
+ "bits": 8
656
+ },
657
+ "language_model.model.layers.39.mlp.shared_expert_gate": {
658
+ "group_size": 64,
659
+ "bits": 8
660
+ }
661
+ },
662
+ "text_config": {
663
+ "attention_bias": false,
664
+ "attention_dropout": 0.0,
665
+ "attn_output_gate": true,
666
+ "bos_token_id": null,
667
+ "dtype": "bfloat16",
668
+ "eos_token_id": 248044,
669
+ "full_attention_interval": 4,
670
+ "head_dim": 256,
671
+ "hidden_act": "silu",
672
+ "hidden_size": 2048,
673
+ "initializer_range": 0.02,
674
+ "layer_types": [
675
+ "linear_attention",
676
+ "linear_attention",
677
+ "linear_attention",
678
+ "full_attention",
679
+ "linear_attention",
680
+ "linear_attention",
681
+ "linear_attention",
682
+ "full_attention",
683
+ "linear_attention",
684
+ "linear_attention",
685
+ "linear_attention",
686
+ "full_attention",
687
+ "linear_attention",
688
+ "linear_attention",
689
+ "linear_attention",
690
+ "full_attention",
691
+ "linear_attention",
692
+ "linear_attention",
693
+ "linear_attention",
694
+ "full_attention",
695
+ "linear_attention",
696
+ "linear_attention",
697
+ "linear_attention",
698
+ "full_attention",
699
+ "linear_attention",
700
+ "linear_attention",
701
+ "linear_attention",
702
+ "full_attention",
703
+ "linear_attention",
704
+ "linear_attention",
705
+ "linear_attention",
706
+ "full_attention",
707
+ "linear_attention",
708
+ "linear_attention",
709
+ "linear_attention",
710
+ "full_attention",
711
+ "linear_attention",
712
+ "linear_attention",
713
+ "linear_attention",
714
+ "full_attention"
715
+ ],
716
+ "linear_conv_kernel_dim": 4,
717
+ "linear_key_head_dim": 128,
718
+ "linear_num_key_heads": 16,
719
+ "linear_num_value_heads": 32,
720
+ "linear_value_head_dim": 128,
721
+ "mamba_ssm_dtype": "float32",
722
+ "max_position_embeddings": 262144,
723
+ "mlp_only_layers": [],
724
+ "model_type": "qwen3_5_moe_text",
725
+ "moe_intermediate_size": 512,
726
+ "mtp_num_hidden_layers": 1,
727
+ "mtp_use_dedicated_embeddings": false,
728
+ "num_attention_heads": 16,
729
+ "num_experts": 256,
730
+ "num_experts_per_tok": 8,
731
+ "num_hidden_layers": 40,
732
+ "num_key_value_heads": 2,
733
+ "output_router_logits": false,
734
+ "pad_token_id": null,
735
+ "partial_rotary_factor": 0.25,
736
+ "rms_norm_eps": 1e-06,
737
+ "rope_parameters": {
738
+ "mrope_interleaved": true,
739
+ "mrope_section": [
740
+ 11,
741
+ 11,
742
+ 10
743
+ ],
744
+ "partial_rotary_factor": 0.25,
745
+ "rope_theta": 10000000,
746
+ "rope_type": "default"
747
+ },
748
+ "router_aux_loss_coef": 0.001,
749
+ "shared_expert_intermediate_size": 512,
750
+ "tie_word_embeddings": false,
751
+ "use_cache": true,
752
+ "vocab_size": 248320
753
+ },
754
+ "tie_word_embeddings": false,
755
+ "transformers_version": "5.2.0",
756
+ "video_token_id": 248057,
757
+ "vision_config": {
758
+ "deepstack_visual_indexes": [],
759
+ "depth": 27,
760
+ "dtype": "bfloat16",
761
+ "hidden_act": "gelu_pytorch_tanh",
762
+ "hidden_size": 1152,
763
+ "in_channels": 3,
764
+ "initializer_range": 0.02,
765
+ "intermediate_size": 4304,
766
+ "model_type": "qwen3_5_moe",
767
+ "num_heads": 16,
768
+ "num_position_embeddings": 2304,
769
+ "out_hidden_size": 2048,
770
+ "patch_size": 16,
771
+ "spatial_merge_size": 2,
772
+ "temporal_patch_size": 2
773
+ },
774
+ "vision_end_token_id": 248054,
775
+ "vision_start_token_id": 248053
776
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248044,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 1.0,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.2.0"
13
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bedc09cee73773b3e57e18e6d17b78cd33ba071489ad347537a67eb835869d32
3
+ size 5349745055
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ed14331fb2aaabd23bf24eadd3dc65125eea2bae585243e5568d59b472064e
3
+ size 5365850155
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e039c056a0ddb8596971715f2942a16519becb79315d010d6d243bc8576f6275
3
+ size 5365850186
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcbb0a956415004a72879bfbbe1144da3cbff38a9cf436d3e4c65c31b2073805
3
+ size 3238250734
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 16777216,
4
+ "shortest_edge": 65536
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "Qwen3VLProcessor",
20
+ "image_processor_type": "Qwen2VLImageProcessorFast"
21
+ }
processor_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessorFast",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 16,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 16777216,
25
+ "shortest_edge": 65536
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.5,
41
+ 0.5,
42
+ 0.5
43
+ ],
44
+ "image_std": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "max_frames": 768,
50
+ "merge_size": 2,
51
+ "min_frames": 4,
52
+ "patch_size": 16,
53
+ "resample": 3,
54
+ "rescale_factor": 0.00392156862745098,
55
+ "return_metadata": false,
56
+ "size": {
57
+ "longest_edge": 25165824,
58
+ "shortest_edge": 4096
59
+ },
60
+ "temporal_patch_size": 2,
61
+ "video_processor_type": "Qwen3VLVideoProcessor"
62
+ }
63
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:639e352c0f904c1875d448ebed6f6faac005fd3eb58393b7f1fb3ff044e5ca03
3
+ size 19989510
tokenizer_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": true,
13
+ "max_length": null,
14
+ "model_max_length": 262144,
15
+ "model_specific_special_tokens": {
16
+ "audio_bos_token": "<|audio_start|>",
17
+ "audio_eos_token": "<|audio_end|>",
18
+ "audio_token": "<|audio_pad|>",
19
+ "image_token": "<|image_pad|>",
20
+ "video_token": "<|video_pad|>",
21
+ "vision_bos_token": "<|vision_start|>",
22
+ "vision_eos_token": "<|vision_end|>"
23
+ },
24
+ "pad_to_multiple_of": null,
25
+ "pad_token": "<|endoftext|>",
26
+ "pad_token_type_id": 0,
27
+ "padding_side": "left",
28
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "split_special_tokens": false,
31
+ "tokenizer_class": "TokenizersBackend",
32
+ "unk_token": null,
33
+ "video_token": "<|video_pad|>",
34
+ "vision_bos_token": "<|vision_start|>",
35
+ "vision_eos_token": "<|vision_end|>"
36
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 25165824,
4
+ "shortest_edge": 4096
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "Qwen3VLProcessor",
20
+ "video_processor_type": "Qwen3VLVideoProcessor"
21
+ }