Upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
README.md +159 -3
added_tokens.json +3 -0
config.json +35 -0
model.safetensors +3 -0
pytorch_model.bin +3 -0
special_tokens_map.json +9 -0
to_onnx.py +185 -0
tokenizer.json +3 -0
tokenizer_config.json +17 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,159 @@
----
-license: mit
----

+---
+datasets:
+- squad_v2
+language:
+- multilingual
+- af
+- am
+- ar
+- as
+- az
+- be
+- bg
+- bn
+- br
+- bs
+- ca
+- cs
+- cy
+- da
+- de
+- el
+- en
+- eo
+- es
+- et
+- eu
+- fa
+- fi
+- fr
+- fy
+- ga
+- gd
+- gl
+- gu
+- ha
+- he
+- hi
+- hr
+- hu
+- hy
+- id
+- is
+- it
+- ja
+- jv
+- ka
+- kk
+- km
+- kn
+- ko
+- ku
+- ky
+- la
+- lo
+- lt
+- lv
+- mg
+- mk
+- ml
+- mn
+- mr
+- ms
+- my
+- ne
+- nl
+- 'no'
+- om
+- or
+- pa
+- pl
+- ps
+- pt
+- ro
+- ru
+- sa
+- sd
+- si
+- sk
+- sl
+- so
+- sq
+- sr
+- su
+- sv
+- sw
+- ta
+- te
+- th
+- tl
+- tr
+- ug
+- uk
+- ur
+- uz
+- vi
+- xh
+- yi
+- zh
+tags:
+- deberta
+- deberta-v3
+- mdeberta
+- question-answering
+- qa
+- multilingual
+thumbnail: https://huggingface.co/front/thumbnails/microsoft.png
+license: mit
+base_model:
+- microsoft/mdeberta-v3-base
+---
+## This model can be used for Extractive QA
+It has been finetuned for 3 epochs on [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/).
+## Usage
+```python
+from transformers import pipeline
+qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
+question = "Where do I live?"
+context = "My name is Tim and I live in Sweden."
+qa_model(question = question, context = context)
+# {'score': 0.975547730922699, 'start': 28, 'end': 36, 'answer': ' Sweden.'}
+```
+## Evaluation on SQuAD2.0 dev set
+```bash
+{
+    "epoch": 3.0,
+    "eval_HasAns_exact": 79.65587044534414,
+    "eval_HasAns_f1": 85.91387795001529,
+    "eval_HasAns_total": 5928,
+    "eval_NoAns_exact": 82.10260723296888,
+    "eval_NoAns_f1": 82.10260723296888,
+    "eval_NoAns_total": 5945,
+    "eval_best_exact": 80.8809904826076,
+    "eval_best_exact_thresh": 0.0,
+    "eval_best_f1": 84.00551406448994,
+    "eval_best_f1_thresh": 0.0,
+    "eval_exact": 80.8809904826076,
+    "eval_f1": 84.00551406449004,
+    "eval_samples": 12508,
+    "eval_total": 11873,
+    "train_loss": 0.7729689576483615,
+    "train_runtime": 9118.953,
+    "train_samples": 134891,
+    "train_samples_per_second": 44.377,
+    "train_steps_per_second": 0.925
+}
+```
+## DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing
+[DeBERTa](https://arxiv.org/abs/2006.03654) improves the BERT and RoBERTa models using disentangled attention and enhanced mask decoder. With those two improvements, DeBERTa out perform RoBERTa on a majority of NLU tasks with 80GB training data.
+In [DeBERTa V3](https://arxiv.org/abs/2111.09543), we further improved the efficiency of DeBERTa using ELECTRA-Style pre-training with Gradient Disentangled Embedding Sharing. Compared to DeBERTa,  our V3 version significantly improves the model performance on downstream tasks.  You can find more technique details about the new model from our [paper](https://arxiv.org/abs/2111.09543).
+Please check the [official repository](https://github.com/microsoft/DeBERTa) for more implementation details and updates.
+mDeBERTa is multilingual version of DeBERTa which use the same structure as DeBERTa and was trained with CC100 multilingual data.
+The mDeBERTa V3 base model comes with 12 layers and a hidden size of 768. It has 86M backbone parameters  with a vocabulary containing 250K tokens which introduces 190M parameters in the Embedding layer.  This model was trained using the 2.5T CC100 data as XLM-R.

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 250101
+}

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "mdeberta-v3-base-squad2/",
+  "architectures": [
+    "DebertaV2ForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.0.dev0",
+  "type_vocab_size": 0,
+  "vocab_size": 251000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16675f4d0b1dcaa8ae44b9d8cd395c7c539de1d5ae06a7242389b195502ef3c2
+size 1112909866

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91d05e57e35a8a3768fbdbd26ecfa3c0672f6e889c0554e1859cabf282de2c56
+size 1112951793

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

to_onnx.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import torch
+from transformers import AutoModelForQuestionAnswering
+from transformers import AutoTokenizer, BertConfig
+import onnx
+from onnxruntime.quantization import quantize_dynamic, QuantType
+import os
+import logging
+import subprocess
+from typing import Optional, Dict, Any
+class ONNXModelConverter:
+    def __init__(self, model_name: str, output_dir: str):
+        self.model_name = model_name
+        self.output_dir = output_dir
+        self.setup_logging()
+        os.makedirs(output_dir, exist_ok=True)
+        self.logger.info(f"Loading tokenizer {model_name}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        self.logger.info(f"Loading model {model_name}...")
+        self.model = AutoModelForQuestionAnswering.from_pretrained(
+                    model_name,
+                    trust_remote_code=True,
+                    torch_dtype=torch.float32
+                )
+        self.model.eval()
+    def setup_logging(self):
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.INFO)
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        handler.setFormatter(formatter)
+        self.logger.addHandler(handler)
+    def prepare_dummy_inputs(self):
+        dummy_input = self.tokenizer(
+            "Hello, how are you?",
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=128
+        )
+        return {
+            'input_ids': dummy_input['input_ids'],
+            'attention_mask': dummy_input['attention_mask'],
+            'token_type_ids': dummy_input['token_type_ids']
+        }
+    def export_to_onnx(self):
+        output_path = os.path.join(self.output_dir, "model.onnx")
+        inputs = self.prepare_dummy_inputs()
+        dynamic_axes = {
+            'input_ids': {0: 'batch_size', 1: 'sequence_length'},
+            'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
+            'token_type_ids': {0: 'batch_size', 1: 'sequence_length'},
+            'start_logits': {0: 'batch_size', 1: 'sequence_length'},
+            'end_logits': {0: 'batch_size', 1: 'sequence_length'},
+        }
+        class ModelWrapper(torch.nn.Module):
+            def __init__(self, model):
+                super().__init__()
+                self.model = model
+            def forward(self, input_ids, attention_mask, token_type_ids):
+                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+                return outputs.start_logits, outputs.end_logits
+        wrapped_model = ModelWrapper(self.model)
+        try:
+            torch.onnx.export(
+                wrapped_model,
+                (inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids']),
+                output_path,
+                export_params=True,
+                opset_version=14,  # Or a suitable version
+                do_constant_folding=True,
+                input_names=['input_ids', 'attention_mask', 'token_type_ids'],
+                output_names=['start_logits', 'end_logits'],
+                dynamic_axes=dynamic_axes,
+                verbose=False
+            )
+            self.logger.info(f"Model exported to {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"ONNX export failed: {str(e)}")
+            raise
+    def verify_model(self, model_path: str):
+        try:
+            onnx_model = onnx.load(model_path)
+            onnx.checker.check_model(onnx_model)
+            self.logger.info("ONNX model verification successful")
+            return True
+        except Exception as e:
+            self.logger.error(f"Model verification failed: {str(e)}")
+            return False
+    def preprocess_model(self, model_path: str) -> str:
+        preprocessed_path = os.path.join(self.output_dir, "model-infer.onnx")
+        try:
+            command = [
+                "python", "-m", "onnxruntime.quantization.preprocess",
+                "--input", model_path,
+                "--output", preprocessed_path
+            ]
+            result = subprocess.run(command, check=True, capture_output=True, text=True)
+            self.logger.info(f"Model preprocessing successful. Output saved to {preprocessed_path}")
+            return preprocessed_path
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"Preprocessing failed: {e.stderr}")
+            raise
+        except Exception as e:
+            self.logger.error(f"Preprocessing failed: {str(e)}")
+            raise
+    def quantize_model(self, model_path: str):
+        weight_types = {'int4':QuantType.QInt4, 'int8':QuantType.QInt8, 'uint4':QuantType.QUInt4, 'uint8':QuantType.QUInt8, 'uint16':QuantType.QUInt16, 'int16':QuantType.QInt16}
+        all_quantized_paths = []
+        for weight_type in weight_types.keys():
+            quantized_path = os.path.join(self.output_dir, "model_" + weight_type + ".onnx")
+            try:
+                quantize_dynamic(
+                    model_path,
+                    quantized_path,
+                    weight_type=weight_types[weight_type]
+                )
+                self.logger.info(f"Model quantized ({weight_type}) and saved to {quantized_path}")
+                all_quantized_paths.append(quantized_path)
+            except Exception as e:
+                self.logger.error(f"Quantization ({weight_type}) failed: {str(e)}")
+                raise
+        return all_quantized_paths
+    def convert(self):
+        try:
+            onnx_path = self.export_to_onnx()
+            if self.verify_model(onnx_path):
+                # Add preprocessing step before quantization
+                preprocessed_path = self.preprocess_model(onnx_path)
+                # Use preprocessed model for quantization
+                quantized_paths = self.quantize_model(preprocessed_path)
+                tokenizer_path = os.path.join(self.output_dir, "tokenizer")
+                self.tokenizer.save_pretrained(tokenizer_path)
+                self.logger.info(f"Tokenizer saved to {tokenizer_path}")
+                return {
+                    'onnx_model': onnx_path,
+                    'preprocessed_model': preprocessed_path,
+                    'quantized_models': quantized_paths,
+                    'tokenizer': tokenizer_path
+                }
+            else:
+                raise Exception("Model verification failed")
+        except Exception as e:
+            self.logger.error(f"Conversion process failed: {str(e)}")
+            raise
+if __name__ == "__main__":
+    MODEL_NAME = "timpal0l/mdeberta-v3-base-squad2"  # Or any other suitable model
+    OUTPUT_DIR = "onnx"
+    try:
+        converter = ONNXModelConverter(MODEL_NAME, OUTPUT_DIR)
+        results = converter.convert()
+        print("\nConversion completed successfully!")
+        print(f"ONNX model path: {results['onnx_model']}")
+        print(f"Preprocessed model path: {results['preprocessed_model']}")
+        print(f"Quantized model paths: {results['quantized_models']}")
+        print(f"Tokenizer path: {results['tokenizer']}")
+    except Exception as e:
+        print(f"Conversion failed: {str(e)}")

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6b52ff7043b8e7c0712d94ffa3c8a9c9522538157a11f5b59dd9051105497b7
+size 16316053

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "name_or_path": "mdeberta-v3-base-squad2/",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "special_tokens_map_file": null,
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}