{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "collapsed": true, "id": "JvMRbVLEJlZT" }, "outputs": [], "source": [ "#@title 🤗 AutoTrain LLM\n", "#@markdown In order to use this colab\n", "#@markdown - upload train.csv to a folder named `data/`\n", "#@markdown - train.csv must contain a `text` column\n", "#@markdown - choose a project name if you wish\n", "#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n", "#@markdown - add huggingface information (token) if you wish to push trained model to huggingface hub\n", "#@markdown - update hyperparameters if you wish\n", "#@markdown - click `Runtime > Run all` or run each cell individually\n", "#@markdown - report issues / feature requests here: https://github.com/huggingface/autotrain-advanced/issues\n", "\n", "\n", "import os\n", "!pip install -U autotrain-advanced > install_logs.txt 2>&1\n", "!autotrain setup --colab > setup_logs.txt\n", "from autotrain import __version__\n", "print(f'AutoTrain version: {__version__}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "A2-_lkBS1WKA" }, "outputs": [], "source": [ "#@markdown ---\n", "#@markdown #### Project Config\n", "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n", "project_name = 'my-autotrain-llm' # @param {type:\"string\"}\n", "model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Push to Hub?\n", "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n", "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n", "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n", "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n", "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n", "hf_token = \"hf_XXX\" #@param {type:\"string\"}\n", "hf_username = \"abc\" #@param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Hyperparameters\n", "unsloth = False # @param [\"False\", \"True\"] {type:\"raw\"}\n", "learning_rate = 2e-4 # @param {type:\"number\"}\n", "num_epochs = 1 #@param {type:\"number\"}\n", "batch_size = 1 # @param {type:\"slider\", min:1, max:32, step:1}\n", "block_size = 1024 # @param {type:\"number\"}\n", "trainer = \"sft\" # @param [\"generic\", \"sft\"] {type:\"string\"}\n", "warmup_ratio = 0.1 # @param {type:\"number\"}\n", "weight_decay = 0.01 # @param {type:\"number\"}\n", "gradient_accumulation = 4 # @param {type:\"number\"}\n", "mixed_precision = \"fp16\" # @param [\"fp16\", \"bf16\", \"none\"] {type:\"string\"}\n", "peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "quantization = \"int4\" # @param [\"int4\", \"int8\", \"none\"] {type:\"string\"}\n", "lora_r = 16 #@param {type:\"number\"}\n", "lora_alpha = 32 #@param {type:\"number\"}\n", "lora_dropout = 0.05 #@param {type:\"number\"}\n", "\n", "os.environ[\"HF_TOKEN\"] = hf_token\n", "os.environ[\"HF_USERNAME\"] = hf_username\n", "\n", "conf = f\"\"\"\n", "task: llm-{trainer}\n", "base_model: {model_name}\n", "project_name: {project_name}\n", "log: tensorboard\n", "backend: local\n", "\n", "data:\n", " path: data/\n", " train_split: train\n", " valid_split: null\n", " chat_template: null\n", " column_mapping:\n", " text_column: text\n", "\n", "params:\n", " block_size: {block_size}\n", " lr: {learning_rate}\n", " warmup_ratio: {warmup_ratio}\n", " weight_decay: {weight_decay}\n", " epochs: {num_epochs}\n", " batch_size: {batch_size}\n", " gradient_accumulation: {gradient_accumulation}\n", " mixed_precision: {mixed_precision}\n", " peft: {peft}\n", " quantization: {quantization}\n", " lora_r: {lora_r}\n", " lora_alpha: {lora_alpha}\n", " lora_dropout: {lora_dropout}\n", " unsloth: {unsloth}\n", "\n", "hub:\n", " username: ${{HF_USERNAME}}\n", " token: ${{HF_TOKEN}}\n", " push_to_hub: {push_to_hub}\n", "\"\"\"\n", "\n", "with open(\"conf.yaml\", \"w\") as f:\n", " f.write(conf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "g3cd_ED_yXXt" }, "outputs": [], "source": [ "!autotrain --config conf.yaml" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 0 }