Spaces:

anhartit-chetan
/

palm-reader

Running

App Files Files Community

palm-reader / app.py

anhartit-chetan

logging added and max new token size reduced

a19d519 about 2 months ago

raw

history blame contribute delete

2.12 kB

	import gradio as gr
	from transformers import AutoProcessor, LlavaForConditionalGeneration
	from PIL import Image
	import torch

	# Load model & processor
	# model_id = "llava-hf/llava-1.5-7b-hf" # Exceeding 16 GB Memory
	# model_id = "llava-hf/llava-1.5-7b-hf-int4" #Does not exists
	model_id = "bczhou/tiny-llava-v1-hf"

	processor = AutoProcessor.from_pretrained(model_id)
	processor.patch_size = 14 # ✅ Fix: set manually
	model = LlavaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	low_cpu_mem_usage=True,
	device_map="auto",
	)


	# Main prediction function
	def analyze_palm(image, question, history):
	if image is None or not question.strip():
	history.append((question, "Please provide both image and question."))
	return history, ""

	# Add <image> token explicitly in the text
	prompt = f"<image>\n{question}"
	print("Processing image and prompt")

	# Manual prompt construction (works for TinyLLaVA)
	inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
	print("Inputs encoded")

	outputs = model.generate(**inputs, max_new_tokens=256)
	print("Output generated")

	response = processor.decode(outputs[0], skip_special_tokens=True)

	history.append((question, response))
	print("Returning response")

	return history, ""


	# Build UI using Blocks
	with gr.Blocks() as demo:
	gr.Markdown("## 🖐️ AI Palm Reader\nUpload a palm image and ask a question. Get a palmistry-style response.")

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label="Palm Image")
	prompt_input = gr.Textbox(lines=2, label="Your Question", placeholder="What does my palm say?")
	submit_btn = gr.Button("Ask")

	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Palmistry Chat")

	state = gr.State([])

	submit_btn.click(
	fn=analyze_palm,
	inputs=[image_input, prompt_input, state],
	outputs=[chatbot, prompt_input]
	)

	demo.launch()