Spaces:
Sleeping
Sleeping
Sobro Inc
commited on
Commit
·
4786618
1
Parent(s):
fdeb5da
Fix permission errors and use simplified version
Browse files- Dockerfile +20 -5
- UPDATE_MCP_CONFIG.md +221 -0
- main.py +8 -13
- main_simple.py +148 -0
- push_to_hf.sh +4 -0
Dockerfile
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
FROM python:3.10-slim
|
2 |
|
|
|
|
|
|
|
3 |
WORKDIR /app
|
4 |
|
5 |
# Install system dependencies
|
@@ -14,14 +17,26 @@ COPY requirements.txt .
|
|
14 |
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
|
16 |
# Download required NLTK data
|
17 |
-
RUN python -m nltk.downloader punkt stopwords
|
|
|
|
|
|
|
18 |
|
19 |
# Copy application code
|
20 |
-
COPY app/ ./app/
|
21 |
-
COPY main.py .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Expose port
|
24 |
EXPOSE 7860
|
25 |
|
26 |
-
# Run the application
|
27 |
-
CMD ["uvicorn", "
|
|
|
1 |
FROM python:3.10-slim
|
2 |
|
3 |
+
# Create app user
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
WORKDIR /app
|
7 |
|
8 |
# Install system dependencies
|
|
|
17 |
RUN pip install --no-cache-dir -r requirements.txt
|
18 |
|
19 |
# Download required NLTK data
|
20 |
+
RUN python -m nltk.downloader -d /usr/local/share/nltk_data punkt stopwords
|
21 |
+
|
22 |
+
# Create cache directories with proper permissions
|
23 |
+
RUN mkdir -p /app/.cache && chown -R user:user /app/.cache
|
24 |
|
25 |
# Copy application code
|
26 |
+
COPY --chown=user:user app/ ./app/
|
27 |
+
COPY --chown=user:user main.py .
|
28 |
+
COPY --chown=user:user main_simple.py .
|
29 |
+
|
30 |
+
# Switch to user
|
31 |
+
USER user
|
32 |
+
|
33 |
+
# Set environment variables
|
34 |
+
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
|
35 |
+
ENV HF_HOME=/app/.cache/huggingface
|
36 |
+
ENV PYTHONUNBUFFERED=1
|
37 |
|
38 |
# Expose port
|
39 |
EXPOSE 7860
|
40 |
|
41 |
+
# Run the application (using simple version first)
|
42 |
+
CMD ["uvicorn", "main_simple:app", "--host", "0.0.0.0", "--port", "7860"]
|
UPDATE_MCP_CONFIG.md
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Обновление конфигурации MCP для SobroJuriBert
|
2 |
+
|
3 |
+
После развертывания SobroJuriBert, обнови конфигурацию MCP:
|
4 |
+
|
5 |
+
## 1. Обнови файл конфигурации
|
6 |
+
|
7 |
+
Отредактируй `/mnt/c/Users/s7/AppData/Roaming/Claude/claude_desktop_config.json`:
|
8 |
+
|
9 |
+
```json
|
10 |
+
{
|
11 |
+
"mcpServers": {
|
12 |
+
"filesystem": {
|
13 |
+
"command": "npx",
|
14 |
+
"args": [
|
15 |
+
"-y",
|
16 |
+
"@modelcontextprotocol/server-filesystem",
|
17 |
+
"C:\\Users\\s7\\Documents",
|
18 |
+
"C:\\sobro-mcp"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"memory": {
|
22 |
+
"command": "npx",
|
23 |
+
"args": [
|
24 |
+
"-y",
|
25 |
+
"@modelcontextprotocol/server-memory"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"sobrojuribert": {
|
29 |
+
"command": "C:\\Users\\s7\\AppData\\Local\\Microsoft\\WindowsApps\\python.exe",
|
30 |
+
"args": [
|
31 |
+
"C:\\sobro-mcp\\sobrojuribert_mcp.py"
|
32 |
+
]
|
33 |
+
}
|
34 |
+
}
|
35 |
+
}
|
36 |
+
```
|
37 |
+
|
38 |
+
## 2. Создай новый MCP сервер
|
39 |
+
|
40 |
+
Создай файл `C:\sobro-mcp\sobrojuribert_mcp.py`:
|
41 |
+
|
42 |
+
```python
|
43 |
+
#!/usr/bin/env python3
|
44 |
+
"""SobroJuriBert MCP Server"""
|
45 |
+
|
46 |
+
import asyncio
|
47 |
+
from typing import Any
|
48 |
+
import aiohttp
|
49 |
+
from mcp.server.models import InitializationOptions
|
50 |
+
from mcp.server import NotificationOptions, Server
|
51 |
+
import mcp.server.stdio
|
52 |
+
import mcp.types as types
|
53 |
+
|
54 |
+
API_URL = "https://sobroinc-sobrojuribert.hf.space"
|
55 |
+
|
56 |
+
async def run_server():
|
57 |
+
server = Server("sobrojuribert-mcp")
|
58 |
+
|
59 |
+
session = None
|
60 |
+
|
61 |
+
@server.list_tools()
|
62 |
+
async def handle_list_tools() -> list[types.Tool]:
|
63 |
+
return [
|
64 |
+
types.Tool(
|
65 |
+
name="juribert_mask_fill",
|
66 |
+
description="Fill [MASK] tokens in French legal text",
|
67 |
+
inputSchema={
|
68 |
+
"type": "object",
|
69 |
+
"properties": {
|
70 |
+
"text": {"type": "string", "description": "Text with [MASK] tokens"},
|
71 |
+
"top_k": {"type": "integer", "default": 5}
|
72 |
+
},
|
73 |
+
"required": ["text"]
|
74 |
+
}
|
75 |
+
),
|
76 |
+
types.Tool(
|
77 |
+
name="juribert_embeddings",
|
78 |
+
description="Generate embeddings for French legal texts",
|
79 |
+
inputSchema={
|
80 |
+
"type": "object",
|
81 |
+
"properties": {
|
82 |
+
"texts": {"type": "array", "items": {"type": "string"}}
|
83 |
+
},
|
84 |
+
"required": ["texts"]
|
85 |
+
}
|
86 |
+
),
|
87 |
+
types.Tool(
|
88 |
+
name="juribert_ner",
|
89 |
+
description="Extract entities from French legal text",
|
90 |
+
inputSchema={
|
91 |
+
"type": "object",
|
92 |
+
"properties": {
|
93 |
+
"text": {"type": "string"}
|
94 |
+
},
|
95 |
+
"required": ["text"]
|
96 |
+
}
|
97 |
+
),
|
98 |
+
types.Tool(
|
99 |
+
name="juribert_classify",
|
100 |
+
description="Classify French legal documents",
|
101 |
+
inputSchema={
|
102 |
+
"type": "object",
|
103 |
+
"properties": {
|
104 |
+
"text": {"type": "string"}
|
105 |
+
},
|
106 |
+
"required": ["text"]
|
107 |
+
}
|
108 |
+
),
|
109 |
+
types.Tool(
|
110 |
+
name="juribert_analyze_contract",
|
111 |
+
description="Analyze French legal contracts",
|
112 |
+
inputSchema={
|
113 |
+
"type": "object",
|
114 |
+
"properties": {
|
115 |
+
"text": {"type": "string"},
|
116 |
+
"contract_type": {"type": "string"}
|
117 |
+
},
|
118 |
+
"required": ["text"]
|
119 |
+
}
|
120 |
+
)
|
121 |
+
]
|
122 |
+
|
123 |
+
@server.call_tool()
|
124 |
+
async def handle_call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
125 |
+
nonlocal session
|
126 |
+
|
127 |
+
if session is None:
|
128 |
+
session = aiohttp.ClientSession()
|
129 |
+
|
130 |
+
try:
|
131 |
+
endpoint_map = {
|
132 |
+
"juribert_mask_fill": "/mask-fill",
|
133 |
+
"juribert_embeddings": "/embeddings",
|
134 |
+
"juribert_ner": "/ner",
|
135 |
+
"juribert_classify": "/classify",
|
136 |
+
"juribert_analyze_contract": "/analyze-contract"
|
137 |
+
}
|
138 |
+
|
139 |
+
endpoint = endpoint_map.get(name)
|
140 |
+
if not endpoint:
|
141 |
+
return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
|
142 |
+
|
143 |
+
async with session.post(
|
144 |
+
f"{API_URL}{endpoint}",
|
145 |
+
json=arguments,
|
146 |
+
timeout=aiohttp.ClientTimeout(total=30)
|
147 |
+
) as response:
|
148 |
+
result = await response.json()
|
149 |
+
|
150 |
+
# Format response based on tool
|
151 |
+
if name == "juribert_mask_fill":
|
152 |
+
text = f"Predictions for: {result['input']}\n"
|
153 |
+
for pred in result['predictions']:
|
154 |
+
text += f"- {pred['sequence']} (score: {pred['score']:.3f})\n"
|
155 |
+
|
156 |
+
elif name == "juribert_embeddings":
|
157 |
+
text = f"Generated {len(result['embeddings'])} embeddings "
|
158 |
+
text += f"(dimension: {result['dimension']})"
|
159 |
+
|
160 |
+
elif name == "juribert_ner":
|
161 |
+
text = f"Found {len(result['entities'])} entities:\n"
|
162 |
+
for ent in result['entities']:
|
163 |
+
text += f"- {ent['text']} ({ent['type']})\n"
|
164 |
+
|
165 |
+
elif name == "juribert_classify":
|
166 |
+
text = f"Document classification:\n"
|
167 |
+
text += f"Primary: {result['primary_category']}\n"
|
168 |
+
text += f"Confidence: {result['confidence']:.1%}\n"
|
169 |
+
|
170 |
+
elif name == "juribert_analyze_contract":
|
171 |
+
text = f"Contract Analysis:\n"
|
172 |
+
text += f"Type: {result['contract_type']}\n"
|
173 |
+
text += f"Parties: {len(result['parties'])}\n"
|
174 |
+
text += f"Key clauses: {', '.join(result['key_clauses'])}\n"
|
175 |
+
if result['missing_clauses']:
|
176 |
+
text += f"Missing: {', '.join(result['missing_clauses'])}\n"
|
177 |
+
|
178 |
+
return [types.TextContent(type="text", text=text)]
|
179 |
+
|
180 |
+
except Exception as e:
|
181 |
+
return [types.TextContent(type="text", text=f"Error: {str(e)}")]
|
182 |
+
|
183 |
+
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
|
184 |
+
await server.run(
|
185 |
+
read_stream,
|
186 |
+
write_stream,
|
187 |
+
InitializationOptions(
|
188 |
+
server_name="sobrojuribert-mcp",
|
189 |
+
server_version="1.0.0",
|
190 |
+
capabilities=server.get_capabilities(
|
191 |
+
notification_options=NotificationOptions(),
|
192 |
+
experimental_capabilities={},
|
193 |
+
),
|
194 |
+
),
|
195 |
+
)
|
196 |
+
|
197 |
+
if session:
|
198 |
+
await session.close()
|
199 |
+
|
200 |
+
def main():
|
201 |
+
asyncio.run(run_server())
|
202 |
+
|
203 |
+
if __name__ == "__main__":
|
204 |
+
main()
|
205 |
+
```
|
206 |
+
|
207 |
+
## 3. Перезапусти Claude Desktop
|
208 |
+
|
209 |
+
После обновления конфигурации, перезапусти Claude Desktop.
|
210 |
+
|
211 |
+
## 4. Используй новые команды
|
212 |
+
|
213 |
+
```
|
214 |
+
Используй juribert_mask_fill с текстом "Le contrat est signé entre les [MASK]"
|
215 |
+
|
216 |
+
Используй juribert_ner для извлечения сущностей из "Le Tribunal de Grande Instance de Paris"
|
217 |
+
|
218 |
+
Классифицируй документ с помощью juribert_classify
|
219 |
+
|
220 |
+
Проанализируй контракт с помощью juribert_analyze_contract
|
221 |
+
```
|
main.py
CHANGED
@@ -80,30 +80,25 @@ async def load_models():
|
|
80 |
try:
|
81 |
# Load JuriBERT base model for embeddings and mask filling
|
82 |
logger.info("Loading JuriBERT base model...")
|
83 |
-
models['juribert_base'] = AutoModel.from_pretrained('dascim/juribert-base')
|
84 |
-
tokenizers['juribert_base'] = AutoTokenizer.from_pretrained('dascim/juribert-base')
|
85 |
-
models['juribert_mlm'] = AutoModelForMaskedLM.from_pretrained('dascim/juribert-base')
|
86 |
|
87 |
# Load CamemBERT models as fallback/complement
|
88 |
logger.info("Loading CamemBERT models...")
|
89 |
models['camembert_ner'] = pipeline(
|
90 |
'ner',
|
91 |
model='Jean-Baptiste/camembert-ner-with-dates',
|
92 |
-
aggregation_strategy="simple"
|
|
|
93 |
)
|
94 |
|
95 |
-
|
96 |
-
logger.info("Loading French legal classification model...")
|
97 |
-
models['legal_classifier'] = pipeline(
|
98 |
-
'text-classification',
|
99 |
-
model='nlptown/bert-base-multilingual-uncased-sentiment' # Placeholder
|
100 |
-
)
|
101 |
-
|
102 |
-
logger.info("All models loaded successfully!")
|
103 |
|
104 |
except Exception as e:
|
105 |
logger.error(f"Error loading models: {e}")
|
106 |
-
|
|
|
107 |
|
108 |
@app.get("/")
|
109 |
async def root():
|
|
|
80 |
try:
|
81 |
# Load JuriBERT base model for embeddings and mask filling
|
82 |
logger.info("Loading JuriBERT base model...")
|
83 |
+
models['juribert_base'] = AutoModel.from_pretrained('dascim/juribert-base', cache_dir="/app/.cache/huggingface")
|
84 |
+
tokenizers['juribert_base'] = AutoTokenizer.from_pretrained('dascim/juribert-base', cache_dir="/app/.cache/huggingface")
|
85 |
+
models['juribert_mlm'] = AutoModelForMaskedLM.from_pretrained('dascim/juribert-base', cache_dir="/app/.cache/huggingface")
|
86 |
|
87 |
# Load CamemBERT models as fallback/complement
|
88 |
logger.info("Loading CamemBERT models...")
|
89 |
models['camembert_ner'] = pipeline(
|
90 |
'ner',
|
91 |
model='Jean-Baptiste/camembert-ner-with-dates',
|
92 |
+
aggregation_strategy="simple",
|
93 |
+
model_kwargs={"cache_dir": "/app/.cache/huggingface"}
|
94 |
)
|
95 |
|
96 |
+
logger.info("Models loaded successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
except Exception as e:
|
99 |
logger.error(f"Error loading models: {e}")
|
100 |
+
# Don't crash completely, allow basic endpoints to work
|
101 |
+
logger.warning("Running in limited mode without all models")
|
102 |
|
103 |
@app.get("/")
|
104 |
async def root():
|
main_simple.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from datetime import datetime
|
4 |
+
from typing import List, Dict, Any, Optional
|
5 |
+
from fastapi import FastAPI, HTTPException
|
6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
7 |
+
from pydantic import BaseModel, Field
|
8 |
+
import torch
|
9 |
+
from transformers import AutoTokenizer, AutoModel, pipeline
|
10 |
+
import numpy as np
|
11 |
+
|
12 |
+
# Configure logging
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
# Initialize FastAPI app
|
17 |
+
app = FastAPI(
|
18 |
+
title="SobroJuriBert API",
|
19 |
+
description="French Legal AI API powered by JuriBERT",
|
20 |
+
version="1.0.0"
|
21 |
+
)
|
22 |
+
|
23 |
+
# Add CORS middleware
|
24 |
+
app.add_middleware(
|
25 |
+
CORSMiddleware,
|
26 |
+
allow_origins=["*"],
|
27 |
+
allow_credentials=True,
|
28 |
+
allow_methods=["*"],
|
29 |
+
allow_headers=["*"],
|
30 |
+
)
|
31 |
+
|
32 |
+
# Global model storage
|
33 |
+
models = {}
|
34 |
+
tokenizers = {}
|
35 |
+
|
36 |
+
# Pydantic models
|
37 |
+
class TextRequest(BaseModel):
|
38 |
+
text: str = Field(..., description="Text to analyze")
|
39 |
+
|
40 |
+
class NERRequest(BaseModel):
|
41 |
+
text: str = Field(..., description="Legal text for entity extraction")
|
42 |
+
|
43 |
+
class ClassificationRequest(BaseModel):
|
44 |
+
text: str = Field(..., description="Legal document to classify")
|
45 |
+
|
46 |
+
@app.on_event("startup")
|
47 |
+
async def load_models():
|
48 |
+
"""Load models on startup"""
|
49 |
+
logger.info("Starting SobroJuriBert API...")
|
50 |
+
logger.info("Models will be loaded on demand to save memory")
|
51 |
+
|
52 |
+
@app.get("/")
|
53 |
+
async def root():
|
54 |
+
"""Root endpoint with API information"""
|
55 |
+
return {
|
56 |
+
"name": "SobroJuriBert API",
|
57 |
+
"version": "1.0.0",
|
58 |
+
"description": "French Legal AI API for lawyers",
|
59 |
+
"status": "operational",
|
60 |
+
"endpoints": {
|
61 |
+
"ner": "/ner - Extract legal entities",
|
62 |
+
"classify": "/classify - Classify legal documents",
|
63 |
+
"health": "/health - Health check"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
|
67 |
+
@app.post("/ner")
|
68 |
+
async def extract_entities(request: NERRequest):
|
69 |
+
"""Extract named entities from French legal text"""
|
70 |
+
try:
|
71 |
+
# Simple entity extraction
|
72 |
+
import re
|
73 |
+
entities = []
|
74 |
+
|
75 |
+
# Extract dates
|
76 |
+
dates = re.findall(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', request.text)
|
77 |
+
for date in dates:
|
78 |
+
entities.append({"text": date, "type": "DATE"})
|
79 |
+
|
80 |
+
# Extract organizations
|
81 |
+
orgs = re.findall(r'(?:SARL|SAS|SA|EURL)\s+[\w\s]+', request.text)
|
82 |
+
for org in orgs:
|
83 |
+
entities.append({"text": org.strip(), "type": "ORG"})
|
84 |
+
|
85 |
+
# Extract courts
|
86 |
+
courts = re.findall(r'(?:Tribunal|Cour)\s+[\w\s]+?(?=\s|,|\.)', request.text)
|
87 |
+
for court in courts:
|
88 |
+
entities.append({"text": court.strip(), "type": "COURT"})
|
89 |
+
|
90 |
+
return {
|
91 |
+
"entities": entities,
|
92 |
+
"text": request.text,
|
93 |
+
"message": "Basic entity extraction (full NER model loading on demand)"
|
94 |
+
}
|
95 |
+
|
96 |
+
except Exception as e:
|
97 |
+
logger.error(f"NER error: {e}")
|
98 |
+
raise HTTPException(status_code=500, detail=str(e))
|
99 |
+
|
100 |
+
@app.post("/classify")
|
101 |
+
async def classify_document(request: ClassificationRequest):
|
102 |
+
"""Classify French legal documents"""
|
103 |
+
try:
|
104 |
+
# Simple keyword-based classification
|
105 |
+
text_lower = request.text.lower()
|
106 |
+
|
107 |
+
categories = {
|
108 |
+
"contract": ["contrat", "accord", "convention", "parties"],
|
109 |
+
"litigation": ["tribunal", "jugement", "litige", "procès"],
|
110 |
+
"corporate": ["société", "sarl", "sas", "entreprise"],
|
111 |
+
"employment": ["travail", "salarié", "employeur", "licenciement"]
|
112 |
+
}
|
113 |
+
|
114 |
+
scores = {}
|
115 |
+
for category, keywords in categories.items():
|
116 |
+
score = sum(1 for kw in keywords if kw in text_lower)
|
117 |
+
if score > 0:
|
118 |
+
scores[category] = score
|
119 |
+
|
120 |
+
if not scores:
|
121 |
+
primary_category = "general"
|
122 |
+
else:
|
123 |
+
primary_category = max(scores, key=scores.get)
|
124 |
+
|
125 |
+
return {
|
126 |
+
"primary_category": primary_category,
|
127 |
+
"categories": [{"category": cat, "score": score} for cat, score in scores.items()],
|
128 |
+
"confidence": 0.8 if scores else 0.5,
|
129 |
+
"document_type": "legal_document"
|
130 |
+
}
|
131 |
+
|
132 |
+
except Exception as e:
|
133 |
+
logger.error(f"Classification error: {e}")
|
134 |
+
raise HTTPException(status_code=500, detail=str(e))
|
135 |
+
|
136 |
+
@app.get("/health")
|
137 |
+
async def health_check():
|
138 |
+
"""Health check endpoint"""
|
139 |
+
return {
|
140 |
+
"status": "healthy",
|
141 |
+
"timestamp": datetime.utcnow().isoformat(),
|
142 |
+
"version": "1.0.0",
|
143 |
+
"message": "SobroJuriBert API is running"
|
144 |
+
}
|
145 |
+
|
146 |
+
if __name__ == "__main__":
|
147 |
+
import uvicorn
|
148 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
push_to_hf.sh
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
echo "Pushing SobroJuriBert to Hugging Face..."
|
3 |
+
git push -u origin main
|
4 |
+
echo "Done! Check: https://huggingface.co/spaces/Sobroinc/SobroJuriBert"
|