ertghiu256
/

deepseek-r1-0528-distilled-qwen3

Text Generation

text-generation-inference

Model card Files Files and versions

ertghiu256 commited on Jul 5

Commit

67cba81

·

verified ·

1 Parent(s): a5b8efe

(Trained with Unsloth)

Files changed (2) hide show

config.json +40 -2
generation_config.json +1 -1

config.json CHANGED Viewed

@@ -10,6 +10,44 @@
     "hidden_size": 2560,
     "initializer_range": 0.02,
     "intermediate_size": 9728,
     "max_position_embeddings": 40960,
     "max_window_layers": 36,
     "model_type": "qwen3",
@@ -23,9 +61,9 @@
     "sliding_window": null,
     "tie_word_embeddings": true,
     "torch_dtype": "float16",
-    "transformers_version": "4.52.4",
     "unsloth_fixed": true,
-    "unsloth_version": "2025.6.8",
     "use_cache": true,
     "use_sliding_window": false,
     "vocab_size": 151936

     "hidden_size": 2560,
     "initializer_range": 0.02,
     "intermediate_size": 9728,
+    "layer_types": [
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention"
+    ],
     "max_position_embeddings": 40960,
     "max_window_layers": 36,
     "model_type": "qwen3",
     "sliding_window": null,
     "tie_word_embeddings": true,
     "torch_dtype": "float16",
+    "transformers_version": "4.53.0",
     "unsloth_fixed": true,
+    "unsloth_version": "2025.6.12",
     "use_cache": true,
     "use_sliding_window": false,
     "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -10,5 +10,5 @@
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
-  "transformers_version": "4.52.4"
 }

   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
+  "transformers_version": "4.53.0"
 }