dewdev commited on
Commit
a32a4d8
·
verified ·
1 Parent(s): 53ead91

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,159 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - squad_v2
4
+ language:
5
+ - multilingual
6
+ - af
7
+ - am
8
+ - ar
9
+ - as
10
+ - az
11
+ - be
12
+ - bg
13
+ - bn
14
+ - br
15
+ - bs
16
+ - ca
17
+ - cs
18
+ - cy
19
+ - da
20
+ - de
21
+ - el
22
+ - en
23
+ - eo
24
+ - es
25
+ - et
26
+ - eu
27
+ - fa
28
+ - fi
29
+ - fr
30
+ - fy
31
+ - ga
32
+ - gd
33
+ - gl
34
+ - gu
35
+ - ha
36
+ - he
37
+ - hi
38
+ - hr
39
+ - hu
40
+ - hy
41
+ - id
42
+ - is
43
+ - it
44
+ - ja
45
+ - jv
46
+ - ka
47
+ - kk
48
+ - km
49
+ - kn
50
+ - ko
51
+ - ku
52
+ - ky
53
+ - la
54
+ - lo
55
+ - lt
56
+ - lv
57
+ - mg
58
+ - mk
59
+ - ml
60
+ - mn
61
+ - mr
62
+ - ms
63
+ - my
64
+ - ne
65
+ - nl
66
+ - 'no'
67
+ - om
68
+ - or
69
+ - pa
70
+ - pl
71
+ - ps
72
+ - pt
73
+ - ro
74
+ - ru
75
+ - sa
76
+ - sd
77
+ - si
78
+ - sk
79
+ - sl
80
+ - so
81
+ - sq
82
+ - sr
83
+ - su
84
+ - sv
85
+ - sw
86
+ - ta
87
+ - te
88
+ - th
89
+ - tl
90
+ - tr
91
+ - ug
92
+ - uk
93
+ - ur
94
+ - uz
95
+ - vi
96
+ - xh
97
+ - yi
98
+ - zh
99
+ tags:
100
+ - deberta
101
+ - deberta-v3
102
+ - mdeberta
103
+ - question-answering
104
+ - qa
105
+ - multilingual
106
+ thumbnail: https://huggingface.co/front/thumbnails/microsoft.png
107
+ license: mit
108
+ base_model:
109
+ - microsoft/mdeberta-v3-base
110
+ ---
111
+ ## This model can be used for Extractive QA
112
+ It has been finetuned for 3 epochs on [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/).
113
+
114
+ ## Usage
115
+ ```python
116
+ from transformers import pipeline
117
+
118
+ qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
119
+ question = "Where do I live?"
120
+ context = "My name is Tim and I live in Sweden."
121
+ qa_model(question = question, context = context)
122
+ # {'score': 0.975547730922699, 'start': 28, 'end': 36, 'answer': ' Sweden.'}
123
+ ```
124
+
125
+ ## Evaluation on SQuAD2.0 dev set
126
+ ```bash
127
+ {
128
+ "epoch": 3.0,
129
+ "eval_HasAns_exact": 79.65587044534414,
130
+ "eval_HasAns_f1": 85.91387795001529,
131
+ "eval_HasAns_total": 5928,
132
+ "eval_NoAns_exact": 82.10260723296888,
133
+ "eval_NoAns_f1": 82.10260723296888,
134
+ "eval_NoAns_total": 5945,
135
+ "eval_best_exact": 80.8809904826076,
136
+ "eval_best_exact_thresh": 0.0,
137
+ "eval_best_f1": 84.00551406448994,
138
+ "eval_best_f1_thresh": 0.0,
139
+ "eval_exact": 80.8809904826076,
140
+ "eval_f1": 84.00551406449004,
141
+ "eval_samples": 12508,
142
+ "eval_total": 11873,
143
+ "train_loss": 0.7729689576483615,
144
+ "train_runtime": 9118.953,
145
+ "train_samples": 134891,
146
+ "train_samples_per_second": 44.377,
147
+ "train_steps_per_second": 0.925
148
+ }
149
+ ```
150
+ ## DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing
151
+
152
+ [DeBERTa](https://arxiv.org/abs/2006.03654) improves the BERT and RoBERTa models using disentangled attention and enhanced mask decoder. With those two improvements, DeBERTa out perform RoBERTa on a majority of NLU tasks with 80GB training data.
153
+
154
+ In [DeBERTa V3](https://arxiv.org/abs/2111.09543), we further improved the efficiency of DeBERTa using ELECTRA-Style pre-training with Gradient Disentangled Embedding Sharing. Compared to DeBERTa, our V3 version significantly improves the model performance on downstream tasks. You can find more technique details about the new model from our [paper](https://arxiv.org/abs/2111.09543).
155
+
156
+ Please check the [official repository](https://github.com/microsoft/DeBERTa) for more implementation details and updates.
157
+
158
+ mDeBERTa is multilingual version of DeBERTa which use the same structure as DeBERTa and was trained with CC100 multilingual data.
159
+ The mDeBERTa V3 base model comes with 12 layers and a hidden size of 768. It has 86M backbone parameters with a vocabulary containing 250K tokens which introduces 190M parameters in the Embedding layer. This model was trained using the 2.5T CC100 data as XLM-R.
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 250101
3
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mdeberta-v3-base-squad2/",
3
+ "architectures": [
4
+ "DebertaV2ForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.25.0.dev0",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 251000
35
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16675f4d0b1dcaa8ae44b9d8cd395c7c539de1d5ae06a7242389b195502ef3c2
3
+ size 1112909866
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d05e57e35a8a3768fbdbd26ecfa3c0672f6e889c0554e1859cabf282de2c56
3
+ size 1112951793
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
to_onnx.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForQuestionAnswering
3
+ from transformers import AutoTokenizer, BertConfig
4
+ import onnx
5
+ from onnxruntime.quantization import quantize_dynamic, QuantType
6
+ import os
7
+ import logging
8
+ import subprocess
9
+ from typing import Optional, Dict, Any
10
+
11
+ class ONNXModelConverter:
12
+ def __init__(self, model_name: str, output_dir: str):
13
+ self.model_name = model_name
14
+ self.output_dir = output_dir
15
+ self.setup_logging()
16
+
17
+ os.makedirs(output_dir, exist_ok=True)
18
+
19
+ self.logger.info(f"Loading tokenizer {model_name}...")
20
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
21
+
22
+ self.logger.info(f"Loading model {model_name}...")
23
+ self.model = AutoModelForQuestionAnswering.from_pretrained(
24
+ model_name,
25
+ trust_remote_code=True,
26
+ torch_dtype=torch.float32
27
+ )
28
+ self.model.eval()
29
+
30
+ def setup_logging(self):
31
+ self.logger = logging.getLogger(__name__)
32
+ self.logger.setLevel(logging.INFO)
33
+ handler = logging.StreamHandler()
34
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
35
+ handler.setFormatter(formatter)
36
+ self.logger.addHandler(handler)
37
+
38
+ def prepare_dummy_inputs(self):
39
+ dummy_input = self.tokenizer(
40
+ "Hello, how are you?",
41
+ return_tensors="pt",
42
+ padding=True,
43
+ truncation=True,
44
+ max_length=128
45
+ )
46
+ return {
47
+ 'input_ids': dummy_input['input_ids'],
48
+ 'attention_mask': dummy_input['attention_mask'],
49
+ 'token_type_ids': dummy_input['token_type_ids']
50
+ }
51
+
52
+ def export_to_onnx(self):
53
+ output_path = os.path.join(self.output_dir, "model.onnx")
54
+ inputs = self.prepare_dummy_inputs()
55
+
56
+ dynamic_axes = {
57
+ 'input_ids': {0: 'batch_size', 1: 'sequence_length'},
58
+ 'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
59
+ 'token_type_ids': {0: 'batch_size', 1: 'sequence_length'},
60
+ 'start_logits': {0: 'batch_size', 1: 'sequence_length'},
61
+ 'end_logits': {0: 'batch_size', 1: 'sequence_length'},
62
+ }
63
+
64
+ class ModelWrapper(torch.nn.Module):
65
+ def __init__(self, model):
66
+ super().__init__()
67
+ self.model = model
68
+
69
+ def forward(self, input_ids, attention_mask, token_type_ids):
70
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
71
+ return outputs.start_logits, outputs.end_logits
72
+
73
+ wrapped_model = ModelWrapper(self.model)
74
+
75
+ try:
76
+ torch.onnx.export(
77
+ wrapped_model,
78
+ (inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids']),
79
+ output_path,
80
+ export_params=True,
81
+ opset_version=14, # Or a suitable version
82
+ do_constant_folding=True,
83
+ input_names=['input_ids', 'attention_mask', 'token_type_ids'],
84
+ output_names=['start_logits', 'end_logits'],
85
+ dynamic_axes=dynamic_axes,
86
+ verbose=False
87
+ )
88
+ self.logger.info(f"Model exported to {output_path}")
89
+ return output_path
90
+ except Exception as e:
91
+ self.logger.error(f"ONNX export failed: {str(e)}")
92
+ raise
93
+
94
+ def verify_model(self, model_path: str):
95
+ try:
96
+ onnx_model = onnx.load(model_path)
97
+ onnx.checker.check_model(onnx_model)
98
+ self.logger.info("ONNX model verification successful")
99
+ return True
100
+ except Exception as e:
101
+ self.logger.error(f"Model verification failed: {str(e)}")
102
+ return False
103
+
104
+ def preprocess_model(self, model_path: str) -> str:
105
+ preprocessed_path = os.path.join(self.output_dir, "model-infer.onnx")
106
+ try:
107
+ command = [
108
+ "python", "-m", "onnxruntime.quantization.preprocess",
109
+ "--input", model_path,
110
+ "--output", preprocessed_path
111
+ ]
112
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
113
+ self.logger.info(f"Model preprocessing successful. Output saved to {preprocessed_path}")
114
+ return preprocessed_path
115
+ except subprocess.CalledProcessError as e:
116
+ self.logger.error(f"Preprocessing failed: {e.stderr}")
117
+ raise
118
+ except Exception as e:
119
+ self.logger.error(f"Preprocessing failed: {str(e)}")
120
+ raise
121
+
122
+ def quantize_model(self, model_path: str):
123
+ weight_types = {'int4':QuantType.QInt4, 'int8':QuantType.QInt8, 'uint4':QuantType.QUInt4, 'uint8':QuantType.QUInt8, 'uint16':QuantType.QUInt16, 'int16':QuantType.QInt16}
124
+ all_quantized_paths = []
125
+ for weight_type in weight_types.keys():
126
+ quantized_path = os.path.join(self.output_dir, "model_" + weight_type + ".onnx")
127
+
128
+ try:
129
+ quantize_dynamic(
130
+ model_path,
131
+ quantized_path,
132
+ weight_type=weight_types[weight_type]
133
+ )
134
+ self.logger.info(f"Model quantized ({weight_type}) and saved to {quantized_path}")
135
+ all_quantized_paths.append(quantized_path)
136
+ except Exception as e:
137
+ self.logger.error(f"Quantization ({weight_type}) failed: {str(e)}")
138
+ raise
139
+
140
+ return all_quantized_paths
141
+
142
+ def convert(self):
143
+ try:
144
+ onnx_path = self.export_to_onnx()
145
+
146
+ if self.verify_model(onnx_path):
147
+ # Add preprocessing step before quantization
148
+ preprocessed_path = self.preprocess_model(onnx_path)
149
+
150
+ # Use preprocessed model for quantization
151
+ quantized_paths = self.quantize_model(preprocessed_path)
152
+
153
+ tokenizer_path = os.path.join(self.output_dir, "tokenizer")
154
+ self.tokenizer.save_pretrained(tokenizer_path)
155
+ self.logger.info(f"Tokenizer saved to {tokenizer_path}")
156
+
157
+ return {
158
+ 'onnx_model': onnx_path,
159
+ 'preprocessed_model': preprocessed_path,
160
+ 'quantized_models': quantized_paths,
161
+ 'tokenizer': tokenizer_path
162
+ }
163
+ else:
164
+ raise Exception("Model verification failed")
165
+
166
+ except Exception as e:
167
+ self.logger.error(f"Conversion process failed: {str(e)}")
168
+ raise
169
+
170
+ if __name__ == "__main__":
171
+ MODEL_NAME = "timpal0l/mdeberta-v3-base-squad2" # Or any other suitable model
172
+ OUTPUT_DIR = "onnx"
173
+
174
+ try:
175
+ converter = ONNXModelConverter(MODEL_NAME, OUTPUT_DIR)
176
+ results = converter.convert()
177
+
178
+ print("\nConversion completed successfully!")
179
+ print(f"ONNX model path: {results['onnx_model']}")
180
+ print(f"Preprocessed model path: {results['preprocessed_model']}")
181
+ print(f"Quantized model paths: {results['quantized_models']}")
182
+ print(f"Tokenizer path: {results['tokenizer']}")
183
+
184
+ except Exception as e:
185
+ print(f"Conversion failed: {str(e)}")
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b52ff7043b8e7c0712d94ffa3c8a9c9522538157a11f5b59dd9051105497b7
3
+ size 16316053
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "eos_token": "[SEP]",
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "name_or_path": "mdeberta-v3-base-squad2/",
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "special_tokens_map_file": null,
13
+ "split_by_punct": false,
14
+ "tokenizer_class": "DebertaV2Tokenizer",
15
+ "unk_token": "[UNK]",
16
+ "vocab_type": "spm"
17
+ }