palm-reader / app.py
anhartit-chetan's picture
logging added and max new token size reduced
a19d519
import gradio as gr
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch
# Load model & processor
# model_id = "llava-hf/llava-1.5-7b-hf" # Exceeding 16 GB Memory
# model_id = "llava-hf/llava-1.5-7b-hf-int4" #Does not exists
model_id = "bczhou/tiny-llava-v1-hf"
processor = AutoProcessor.from_pretrained(model_id)
processor.patch_size = 14 # βœ… Fix: set manually
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
device_map="auto",
)
# Main prediction function
def analyze_palm(image, question, history):
if image is None or not question.strip():
history.append((question, "Please provide both image and question."))
return history, ""
# Add <image> token explicitly in the text
prompt = f"<image>\n{question}"
print("Processing image and prompt")
# Manual prompt construction (works for TinyLLaVA)
inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
print("Inputs encoded")
outputs = model.generate(**inputs, max_new_tokens=256)
print("Output generated")
response = processor.decode(outputs[0], skip_special_tokens=True)
history.append((question, response))
print("Returning response")
return history, ""
# Build UI using Blocks
with gr.Blocks() as demo:
gr.Markdown("## πŸ–οΈ AI Palm Reader\nUpload a palm image and ask a question. Get a palmistry-style response.")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label="Palm Image")
prompt_input = gr.Textbox(lines=2, label="Your Question", placeholder="What does my palm say?")
submit_btn = gr.Button("Ask")
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Palmistry Chat")
state = gr.State([])
submit_btn.click(
fn=analyze_palm,
inputs=[image_input, prompt_input, state],
outputs=[chatbot, prompt_input]
)
demo.launch()