Spaces:

kalinin-a-i
/

ML2_HW4_Kalinin

Running

Alexey Kalinin commited on Mar 31

Commit

44ee98d

1 Parent(s): eda7916

add trained model

Files changed (4) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from inference import classify, load_model  # Replace with your actual module
 @st.cache_resource
 def get_model():
-    return load_model()
 def get_arxiv_article_info(url):
     """Extracts title and abstract from an arXiv article link."""
@@ -16,7 +16,7 @@ def get_arxiv_article_info(url):
     return None, None
 # Load model once
-model = get_model()
 st.title("ArXiv Article Classifier")
@@ -41,6 +41,6 @@ elif input_method == "Manual Input":
 # Classification and output
 if title and abstract:
-    category = classify(title, abstract)
     st.write(f"### Title: {title}")
     st.write(f"**Predicted Category:** {category}")

 @st.cache_resource
 def get_model():
+    return load_model("kalinin-a-i/ml2-hw4", "class2name.joblib")
 def get_arxiv_article_info(url):
     """Extracts title and abstract from an arXiv article link."""
     return None, None
 # Load model once
+pipe, class2name = get_model()
 st.title("ArXiv Article Classifier")
 # Classification and output
 if title and abstract:
+    category = classify(pipe, class2name, title, abstract)
     st.write(f"### Title: {title}")
     st.write(f"**Predicted Category:** {category}")

class2name.joblib ADDED Viewed

Binary file (400 Bytes). View file

inference.py CHANGED Viewed

@@ -1,6 +1,24 @@
-def load_model():
-    pass
-def classify(title, abstract):
-    return "physics"

+from transformers import  AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+from transformers import pipeline, Pipeline
+from joblib import load
+def load_model(path2chkpt: str, path2mapping: str):
+    model = AutoModelForSequenceClassification.from_pretrained("/home/jupyter/datasphere/project/hw4_nlp_ops/weights_20_classes/checkpoint-4500")
+    tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased")
+    pipe = pipeline("text-classification",
+                   model=model,
+                   tokenizer=tokenizer)
+    class2name = load(path2mapping)
+    return pipe, class2name
+def classify(pipe: Pipeline, class2name: dict[str, str], title: str, abstract: str):
+    inputs = ".".join([title, abstract])
+    class_code = pipe(inputs)[0]["label"]
+    return class2name[class_code]

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 streamlit
-arxiv

 streamlit
+arxiv
+joblib