File size: 1,523 Bytes
52eefc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{
  "config_version": "1.0.0",
  "model_id": "Qwen/Qwen2.5-7B",
  "model_kinds": [
    "ARTIFACT"
  ],
  "model_class": {
    "module": "furiosa_models.architecture.models.qwen2",
    "name": "Qwen2ForCausalLM"
  },
  "llm_config": {
    "optimization_config": {
      "attention_type": "PAGED_ATTENTION",
      "optimize_rope": true,
      "optimize_packed": true,
      "decompose_layernorm": false,
      "optimize_furiosa": false,
      "use_unsplit_packed": false,
      "compact_causal_mask": false,
      "use_rngd_gelu": false,
      "causal_mask_free_decoding": true,
      "kv_cache_sharing_across_beams": false,
      "inbound_beamsearch_softmax": false,
      "calculate_logit_only_for_last_token": false,
      "optimized_for_speculative_decoding": true,
      "use_2d_masks": false,
      "merged_kv_indices": false
    },
    "quantization_config": {
      "weight": "bf16",
      "activation": "bf16",
      "kv_cache": "bf16",
      "use_mcp": false
    }
  },
  "components_versions": {
    "furiosa_llm": {
      "version": "0.1.0-dev",
      "git_hash": "0d00aee",
      "build_time": null
    },
    "furiosa_ir": {
      "version": "0.11.0-dev",
      "git_hash": "4467f6a699",
      "build_time": "2025-08-27T04:38:44Z"
    },
    "furiosa_runtime": {
      "version": "2025.3.1",
      "git_hash": "0d00aee20",
      "build_time": "2025-08-27T04:38:48Z"
    },
    "furiosa_model_compressor": {
      "version": "2025.3.0 (rev: 4cd9804)",
      "git_hash": null,
      "build_time": null
    }
  }
}