from unsloth import FastLanguageModel, FastModel import torch from datasets import load_dataset from trl import GRPOConfig, GRPOTrainer from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState from transformers.training_args import TrainingArguments from unsloth.chat_templates import standardize_data_formats, train_on_responses_only import re from lexicalrichness import LexicalRichness from collections import deque # torch._dynamo.config.cache_size_limit = 256 # pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo _regexes = [ r"voice barely (audible|above a whisper)", r"(wasn['’]t|was not)[^.!?]*?[;,] it ", r"(down|up)[^.!?]*?spine", r"(wasn['’]t|was not|hadn['’]t|had not|weren['’]t|were not)[^.!?]*?[;,] but ", r"(weren['’]t|were not)[^.!?]*?[;,]\s*\w*? were ", r"not[^.!?]*?[;,] but ", r"cross(es|ed)? \w+ (arms|legs)", r"[^.!?]*([^.!?]*,){4,}[^.!?]*", r"crackle(d|s)? with tension", r"(mix\w*|blend) of \w+ and \w+", r"as \w+ as (it|they) (was|is|were|are) \w+", r"(widen(ed|s)?|narrow(ed|s)?) \w+ eyes", r"eyes (widen(ed|s)?|narrow(ed|s)?)", r"here['’\s\w]+? topic:", ] REGEXES = tuple(re.compile(text, re.IGNORECASE) for text in _regexes) PHRASES = set( [ "a testament to", "living rock", # this only occurred a few times, but i hate it a lot "air hung", "hung heavy", "hung thick", "air thick", "air was thick", "clung", "palpable", "unsettling stillness", "forged in the fires", "suffocating blanket", ", almost", "tasted of", "monument to", "settled over", "chilling ", "beacon of", "mirrored the", "something uniquely", "of ozone", "heady scent", "breath hitch", "---", "felt like a", "a stark contrast", "desperate attempt to", "stark contrast to", "a desperate attempt", "rain hammered against", "word piece exploring", "hung thick with", "a carefully constructed", "air hung thick", "the rain continued", "a constant reminder", "a desperate need", "beneath the surface", "continued to fall", "something far more", "rain continued to", "constant reminder of", "metallic tang of", "desperate need to", "a stark reminder", "the metallic tang", "felt less like", "wasnt simply a", "face etched with", "chilling certainty that", "wasnt merely a", "to fall washing", "a chilling certainty", "profound sense of", "palpable sense of", "said his voice", "a profound sense", "it felt like", "stark reminder of", "a slow deliberate", "a subtle shift", "subtle shifts in", "the carefully constructed", "less like a", "a desperate gamble", "a palpable sense", "the rain intensified", "acutely aware of", "voice a low", "the subtle shifts", "the encroaching darkness", "flicker of something", "subtle shift in", "air crackled with", "a desperate plea", "sense of unease", "a slow agonizing", "threatened to consume", "a carefully orchestrated", "a chilling realization", "silent testament to", "a chilling reminder", "the unsettling feeling", "inextricably linked to", "the traditional sense", "washing away the", "desperate need for", "it wasnt simply", "desperately trying to", "a silent testament", "his face etched", "a different kind", "hung thick and", "different kind of", "leaving behind a", "reminder that even", "carefully constructed facade", "something akin to", "wasnt a simple", "scent of damp", "of damp earth", "voice low and", "rain hammered down", "relentless pursuit of", "his voice low", "chilling reminder of", "hung heavy in", "last vestiges of", "faces etched with", "the last vestiges", "tang of blood", "the air grew", "damp earth and", "a strange unsettling", "his carefully constructed", "swirling vortex of", "air thick with", "growing sense of", "a solitary figure", "gaze fixed on", "the air thick", "the delicate balance", "exploring the scenario", "the sheer volume", "the corrugated iron", "a silent observer", "threatened to unravel", "honed by years", "the sheer scale", "felt like an", "find a way", "need to understand", "sheer scale of", "leaving behind only", "a swirling vortex", "clung to everything", "word exploration of", "a word exploration", "now felt like", "sheer volume of", "it wasnt merely", "wasnt driven by", "futile attempt to", "a growing sense", "scent of pine", "amidst the chaos", "the scenario youve", "rising tide of", "fall washing away", "sense of dread", "chipped away at", "voice laced with", "a physical blow", "yet beneath the", "threatened to overwhelm", "lingering scent of", "felt a strange", "their faces etched", "mirroring the frantic", "a suffocating blanket", "a desperate scramble", "felt a profound", "barely audible above", "enduring power of", "desperate plea for", "stubborn refusal to", "voice barely audible", "desperately tried to", "wasnt born of", "the enduring power", "skeletal remains of", "the conventional sense", "a subtle almost", "a grim determination", "seemed to absorb", "brute force but", "air thickened with", "the skeletal remains", "mirroring the relentless", "of something akin", "a silent promise", "the looming threat", "chilling testament to", "corrugated iron roof", "what felt like", "prickle of unease", "a meticulously crafted", "a slow insidious", "like a physical", "physical manifestation of", "a chilling testament", "far more complex", "a relentless grey", "deep within the", "a palpable tension", "a delicate dance", "a low rumble", "delicate balance of", "chilling clarity that", "a desperate hope", "his voice barely", "the natural world", "utterly devoid of", "the established order", "always seemed to", "a deliberate attempt", "stumbled upon a", "like a shroud", "a futile attempt", "a silent watchful", "like an eternity", "response exploring the", "a quiet almost", "his gaze fixed", "a physical manifestation", "deliberate attempt to", "profound and unsettling", "wasnt a warrior", "wasnt interested in", "beneath the veneer", "the air thickened", "something ancient and", "rustle of leaves", "desperately wanted to", "a stubborn refusal", "the storm brewing", "a calculated risk", "mirroring the storm", "a relentless drumming", "eyes burning with", "wasnt simply about", "trapped within the", "face a mask", "far more sinister", "the lingering scent", "act of defiance", "a primal fear", "a primal instinct", "the rising tide", "swift and brutal", "the air crackled", "he wasnt simply", "wasnt a grand", "unsettling feeling that", "a desperate almost", "a life lived", "his hand instinctively", "this wasnt simply", "primal need to", "her voice barely", "the first time", "hardened by years", "spent the last", "perhaps focusing on", "something older something", "sickening certainty that", "deliberate act of", "a desperate dance", "a desperate struggle", "a specific aspect", "desperate scramble for", "devastating consequences of", "continued its relentless", "the devastating consequences", "the chilling realization", "a gilded cage", "a bygone era", "something beyond the", "a chilling clarity", "hadnt stopped for", "the relentless pursuit", "fabric of reality", "far more insidious", "a deliberate act", "a silent acknowledgment", "wasnt a dramatic", "unsettling feeling of", "a deliberate almost", "the desperate need", "fall washing over", "a deeply ingrained", "frantic beat of", "stark reminder that", "the relentless drumming", "his voice strained", "a strange almost", "like a betrayal", "couldnt shake the", "silent promise of", "the oppressive atmosphere", "sliver of hope", "the frantic beat", "simple act of", "in hushed tones", "something else something", "a sickening certainty", "a word description", "a primal need", "was inextricably linked", "a grim reminder", "whirlwind of steel", "hung heavy with", "veteran of countless", "a relentless percussion", "a delicate balance", "chilling realization dawned", "it felt less", "drip of water", "within the confines", "a silent acknowledgement", "felt the weight", "fall washing the", "air thrummed with", "the air around", "voice barely a", "word response on", "silent acknowledgment of", "a cold dread", "grim testament to", "a fragile shield", "with brutal efficiency", "a small almost", "of countless battles", "a grim testament", "carried the weight", "force of nature", "air hung heavy", "far more dangerous", "exploring the provided", "felt a prickle", "seemed to mirror", "a small intricately", "rain hadnt stopped", "her carefully constructed", "chilling realization that", "small intricately carved", "and perhaps even", "a poignant reminder", "nestled amongst the", "voice tight with", "he couldnt quite", "shake the feeling", "radiating an unsettling", "a low resonant", "unwavering belief in", "seemed to amplify", "specific aspect of", "uncanny ability to", "a grizzled veteran", "shadows across the", "devoid of emotion", "seemed to vibrate", "the oppressive silence", "the unsettling truth", "fueled by adrenaline", "display of power", "storm brewing within", "wasnt a gentle", "face of overwhelming", "silent acknowledgement of", "yet amidst the", "grim reminder of", "a brutal reminder", "key to unlocking", "far more unsettling", "held the key", "steel and fury", "a profound unsettling", "struggle for survival", "crushing weight of", "brutal reminder of", "a silent sentinel", "a mirror reflecting", "eyes fixed on", "rain intensified blurring", "found himself increasingly", "exploring the described", "the subtle shift", "the air thrummed", "subtle almost imperceptible", "forgotten corner of", "perhaps just perhaps", "a grotesque parody", "a man built", "the constant threat", "glimmer of hope", "the unsettling realization", "the storm raging", "a living breathing", "seemed to shift", "with terrifying speed", "the watchful eyes", "looming threat of", "and something else", "of immense power", "the darkness within", "a temporary reprieve", "watchful eyes of", "mirroring the tempest", "eyes scanning the", "with chilling certainty", "barely a whisper", "a constant mournful", "the true nature", "air was thick", "rain intensified washing", "grotesque parody of", "a constant unsettling", "a final desperate", "the slow agonizing", "with chilling clarity", "true nature of", "spent his life", "flicker of hope", "weight of responsibility", "ever the pragmatist", "---", "carefully constructed", "wasnt simply", "stark contrast", "rain hammered", "hung thick", "desperate attempt", "wasnt merely", "piece exploring", "something far", "air hung", "desperate need", "rain continued", "metallic tang", "constant reminder", "chilling certainty", "leaving behind", "less like", "slow deliberate", "stark reminder", "felt less", "subtle shift", "meticulously crafted", "brute force", "hung heavy", "chilling realization", "face etched", "almost imperceptible", "slow agonizing", "damp earth", "carefully orchestrated", "fall washing", "subtle shifts", "profound sense", "palpable sense", "trapped within", "acutely aware", "relentless pursuit", "desperate plea", "unsettling feeling", "grim determination", "desperate gamble", "brutal efficiency", "rain intensified", "delicate balance", "something else", "deep within", "encroaching darkness", "inextricably linked", "voice low", "relentless drumming", "washing away", "chilling reminder", "profoundly unsettling", "werent simply", "air crackled", "stumbled upon", "perhaps even", "silent testament", "voice barely", "desperate hope", "traditional sense", "stone walls", "unsettling stillness", "desperately trying", "different kind", "unwavering loyalty", "felt increasingly", "something beyond", "constructed facade", "something akin", "desperate struggle", "three days", "barely audible", "strange unsettling", "relentless assault", "settled upon", "faces etched", "air grew", "last vestiges", "something ancient", "felt profoundly", "solitary figure", "hadnt simply", "swirling vortex", "didnt offer", "far beyond", "air thick", "chilling clarity", "quiet strength", "raw power", "intricately carved", "desperate scramble", "young man", "growing sense", "sheer scale", "hadnt stopped", "wasnt born", "primal fear", "gaze fixed", "werent merely", "hand instinctively", "almost unsettling", "offered little", "silent observer", "sheer volume", "corrugated iron", "couldnt quite", "subtle almost", "crumbling stone", "air thickened", "suffocating blanket", "primal instinct", "yet beneath", "carefully crafted", "gilded cage", "didnt understand", "countless battles", "deeply ingrained", "looming threat", "rising tide", "futile attempt", "silent watchful", "palpable tension", "meticulously constructed", "wasnt driven", "impending doom", "carefully cultivated", "chaotic energy", "slow insidious", "brewing within", "conventional sense", "remained stubbornly", "far greater", "something deeper", "primal need", "physical blow", "lingering scent", "suffocating weight", "meticulously documented", "chipped away", "skeletal remains", "voice laced", "grey stone", "perpetual twilight", "simmering resentment", "unlike anything", "delicate dance", "unsettling beauty", "enduring power", "stubborn refusal", "former self", "realization dawned", "established order", "unsettling silence", "unsettling truth", "wasnt seeking", "ruthless efficiency", "relentless grey", "nestled amongst", "pressed onward", "voice strained", "silent promise", "bruised purple", "something profoundly", "calculated risk", "desperately tried", "isnt simply", "meticulously planned", "quiet almost", "deliberate almost", "felt utterly", "air around", "storm brewing", "growing unease", "man whose", "every step", "white tower", "something older", "desperate dance", "response exploring", "never truly", "always felt", "older something", "years spent", "far older", "chilling testament", "iron roof", "devastating consequences", "physical manifestation", "deliberate act", "always seemed", "low rumble", "fragile hope", "deeply unsettling", "natural world", "descended upon", "mirror reflecting", "rain hadnt", "desolate landscape", "hed witnessed", "deliberate attempt", "utterly devoid", "immense power", "oppressive atmosphere", "desperately wanted", "voice tight", "spoke volumes", "sickening certainty", "watchful eyes", "specific aspect", "wasnt interested", "eyes burning", "damp stone", "hidden beneath", "small almost", "desperate almost", "simple act", "unsettling energy", "cold calculating", "grizzled veteran", "dust motes", "didnt simply", "true nature", "almost ritualistic", "unwavering belief", "fragile peace", "strange almost", "first time", "unsettling quiet", "left behind", "chilling precision", "life lived", "constant threat", "felt hollow", "grey sky", "stepped forward", "frantic energy", "yet amidst", "initially dismissed", "unsettling grace", "raw untamed", "growing dread", "perhaps focusing", "quiet observation", "wasnt sure", "silent acknowledgment", "couldnt shake", "didnt speak", "unsettling realization", "hidden within", "genuine connection", "unwavering resolve", "surged forward", "focal point", "cold dread", "almost obsessive", "oppressive silence", "final desperate", "wasnt built", "rain mirrored", "shift occurred", "bygone era", "tapestry woven", "darkness within", "frantic beat", "carefully curated", "quiet dignity", "hushed tones", "shadows across", "small intricately", "profound unsettling", "almost clinical", "potential threat", "unsettling atmosphere", "unsettling presence", "began subtly", "chaotic dance", "crushing weight", "desperately needed", "centuries ago", "else something", "momentarily stunned", "forgotten lore", "polished obsidian", "grim reminder", "desperate act", "silent acknowledgement", "brute strength", "direct confrontation", "etched onto", "meticulously documenting", "relentless percussion", "something darker", "grim testament", "fragile shield", "relentless advance", "fragmented memories", "deeply rooted", "chilling efficiency", "brutal reality", "treacherous currents", "biting wind", "air thrummed", "something vaguely", "spent years", "profound understanding", "wasnt entirely", "couldnt afford", "felt heavy", "raging within", "dancing shadows", "poignant reminder", "brutal reminder", "drip drip", "turning point", "eyes scanning", "unnerving stillness", "brow furrowed", "quiet intensity", "immediate threat", "shared understanding", "relentless pressure", "watchful presence", "forgotten corner", "unsettling calm", "profound sadness", "low resonant", "ancient texts", "constant unsettling", "salt spray", "silent sentinel", "fleeting moment", "eyes reflecting", "chilling awareness", "silas blackwood", "simmering rage", "hed initially", "intricate patterns", "vantage point", "felt different", "swift decisive", "world around", "horrifying truth", "desperately sought", "storm raging", "strategically placed", "faces grim", "watchful gaze", "relentless tide", "centered around", "shared experience", "flickering light", "barely perceptible", "precarious balance", "overwhelming odds", "uncanny ability", "silent witness", "living breathing", "casting long", "frantic rhythm", "ground beneath", "silence punctuated", "swirling around", "felt strangely", "buried beneath", "desperately seeking", "eyes fixed", "intensified blurring", "temporary reprieve", "swift brutal", "terrifying speed", "every movement", "another layer", "something within", "hed anticipated", "unsettling power", "irrevocably altered", "chilling echo", "shifting sands", "almost frantic", "metallic scent", "blackwood manor", "damp air", "stripping away", "chilling premonition", "face pale", "perilous journey", "face grim", "constant companion", "however remained", "stripped bare", "flickering candlelight", "suddenly felt", "brief respite", "grotesque parody", "almost mournful", "subtle unsettling", "practiced grace", "constant mournful", "brutal dance", "nervous energy", "almost palpable", "isnt merely", "precious seconds", "desperate yearning", "desperate desire", "constructed illusion", "almost unbearable", "unpredictable nature", "internal struggle", "intensified washing", "unwavering focus", "weathered stone", "something metallic", "utterly alien", "every move", "oppressive darkness", "grim reality", "man sculpted", "jagged peaks", "something shifted", "overwhelming force", ] ) PUNC = ["—", "–", "*", "#", "…"] COUNT = 0 def score_lexical(text: str): mtld = LexicalRichness(text).mtld() score = min(mtld - 100.0, 20.0) print(f"MTLD = {mtld}, score = {score}") return score def score_slop(prompt: str, completion: str): count = 0 prep = re.sub(r"[^a-zA-Z ]", "", completion) if ("rain" in completion) and (not "rain" in prompt): r = completion.count("rain") count += r * 2 print(f"rain = {r}") if ("air" in completion) and (not "air" in prompt): a = completion.count("air") count += a * 2 print(f"air = {a}") count += sum(completion.count(p) for p in PUNC) count += sum(prep.count(p) for p in PHRASES) count += sum(len(r.findall(completion)) for r in REGEXES) score = 25.0 - (count * 5.0) print(f"slop count = {count}, score = {score}") return score def score_word_count(prompt: str, completion: str): requested = int(prompt.split(" ")[1]) actual = len(completion.split(" ")) diff = abs(requested - actual) / (requested + 1e-9) score = 20.0 - (diff * 200.0) print( f"requested words = {requested}, actual = {actual}, diff = {diff:.3f}, score = {score}" ) return score def slop_reward(prompts, completions, **kwargs) -> list[float]: global COUNT COUNT += 1 responses = [completion[0]["content"] for completion in completions] rewards = [] for i, r in enumerate(responses): if COUNT < 200: rewards.append( score_slop(prompts[i][0]["content"], r) + score_word_count(prompts[i][0]["content"], r) ) else: # add in lexical score after 200 steps rewards.append( score_slop(prompts[i][0]["content"], r) + score_word_count(prompts[i][0]["content"], r) + score_lexical(r) ) return rewards MAXLEN = 3 REWARDS = deque(maxlen=MAXLEN) REWARD_MAX = 65.0 class MonitorRewardCallback(TrainerCallback): global REWARDS def on_log( self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs, ): logs = kwargs.get("logs") if logs: reward = logs.get("reward") or 1e-9 REWARDS.append(reward) mean = sum(REWARDS) / float(MAXLEN) if mean >= REWARD_MAX - 5.0: control.should_training_stop = True max_seq_length = 1200 model, tokenizer = FastLanguageModel.from_pretrained( model_name="google/gemma-3-4b-it", # fast_inference = True, max_seq_length=max_seq_length, # Choose any for long context! load_in_4bit=False, # 4 bit quantization to reduce memory load_in_8bit=False, # [NEW!] A bit more accurate, uses 2x memory ) model = FastLanguageModel.get_peft_model( model, finetune_vision_layers=False, finetune_language_layers=True, finetune_attention_modules=True, finetune_mlp_modules=True, r=64, lora_alpha=128, lora_dropout=0, bias="none", random_state=888, use_rslora=True, use_gradient_checkpointing="unsloth", ) dataset = load_dataset( "json", data_files="/home/anon/dataset_small.json", split="train" ) max_prompt_length = 200 training_args = GRPOConfig( # use_vllm = True, # cache_implementation='offloaded', temperature=1.5, min_p=0.1, # loss_type="dr_grpo", loss_type="bnpo", learning_rate=3e-6, adam_beta1=0.9, adam_beta2=0.9, weight_decay=5e-6, warmup_ratio=0.1, lr_scheduler_type="cosine", # optim = "adamw_8bit", optim="adamw_torch_fused", scale_rewards=False, logging_steps=1, per_device_train_batch_size=1, gradient_accumulation_steps=1, # Increase to 4 for smoother training num_generations=15, max_prompt_length=max_prompt_length, max_completion_length=max_seq_length - max_prompt_length, num_train_epochs=1, max_grad_norm=0.1, report_to="wandb", # report_to="none", output_dir="outputs", ) trainer = GRPOTrainer( model=model, processing_class=tokenizer, reward_funcs=[slop_reward], args=training_args, train_dataset=dataset, callbacks=[MonitorRewardCallback()], ) gpu_stats = torch.cuda.get_device_properties(0) start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") print(f"{start_gpu_memory} GB of memory reserved.") trainer_stats = trainer.train() used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) used_memory_for_lora = round(used_memory - start_gpu_memory, 3) used_percentage = round(used_memory / max_memory * 100, 3) lora_percentage = round(used_memory_for_lora / max_memory * 100, 3) print( f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training." ) print(f"Peak reserved memory = {used_memory} GB.") print(f"Peak reserved memory for training = {used_memory_for_lora} GB.") print(f"Peak reserved memory % of max memory = {used_percentage} %.") print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.") model.save_pretrained("gemma3_tune") tokenizer.save_pretrained("gemma3_tune") model.save_pretrained_merged("gemma3_tune_merged", tokenizer)