import numpy as np import torch import tiktoken from torch import nn tokenizer = tiktoken.get_encoding("cl100k_base") def text_to_token_ids(text, tokenizer): encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'}) encoded_tensor = torch.tensor(encoded).unsqueeze(0) return encoded_tensor def token_ids_to_text(token_ids, tokenizer): flat = token_ids.squeeze(0) return tokenizer.decode(flat.tolist())