AdityaBDhruva commited on
Commit
b0cfec1
·
verified ·
1 Parent(s): 8804a05

Rename handler.py to tokenizer.py

Browse files
Files changed (2) hide show
  1. handler.py +0 -12
  2. tokenizer.py +14 -0
handler.py DELETED
@@ -1,12 +0,0 @@
1
- class EndpointHandler:
2
- def __init__(self):
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
- self.tokenizer = AutoTokenizer.from_pretrained(".")
5
- self.model = AutoModelForSequenceClassification.from_pretrained(".")
6
-
7
- def __call__(self, inputs: dict) -> dict:
8
- texts = inputs.get("inputs")
9
- tokens = self.tokenizer(texts, return_tensors="pt", padding=True)
10
- outputs = self.model(**tokens)
11
- # convert outputs to JSON-serializable form
12
- return {"logits": outputs.logits.tolist()}
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import tiktoken
4
+ from torch import nn
5
+ tokenizer = tiktoken.get_encoding("cl100k_base")
6
+
7
+ def text_to_token_ids(text, tokenizer):
8
+ encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'})
9
+ encoded_tensor = torch.tensor(encoded).unsqueeze(0)
10
+ return encoded_tensor
11
+
12
+ def token_ids_to_text(token_ids, tokenizer):
13
+ flat = token_ids.squeeze(0)
14
+ return tokenizer.decode(flat.tolist())