import os import pandas as pd from datetime import datetime from dotenv import load_dotenv from md_html import convert_single_md_to_html as convert_md_to_html from news_analysis import fetch_deep_news, generate_value_investor_report from fin_interpreter import analyze_article BASE_DIR = os.path.dirname(os.path.dirname(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") HTML_DIR = os.path.join(BASE_DIR, "html") os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(HTML_DIR, exist_ok=True) load_dotenv() def derive_priority(sentiment, confidence): if sentiment == "Positive" and confidence > 0.7: return "High" elif sentiment == "Negative" and confidence > 0.6: return "High" elif confidence > 0.5: return "Medium" return "Low" def run_value_investing_analysis(csv_path, progress_callback=None): current_df = pd.read_csv(csv_path) all_articles = [] company_data = [] for _, row in current_df.iterrows(): topic = row.get("topic") timespan = row.get("timespan_days", 7) if progress_callback: progress_callback(f"🔍 Processing: {topic} ({timespan} days)") news = fetch_deep_news(topic, timespan) if not news: continue for article in news: summary = article.get("summary", "") title = article.get("title", "Untitled") url = article.get("url", "") date = article.get("date", datetime.now().strftime("%Y-%m-%d")) try: result = analyze_article(summary) sentiment = result.get("sentiment", "Neutral") confidence = float(result.get("confidence", 0.0)) except Exception as e: print(f"[FinBERT ERROR] {e}") sentiment, confidence = "Neutral", 0.0 priority = derive_priority(sentiment, confidence) # Add to articles_df all_articles.append({ "Title": title, "URL": url, "Summary": summary, "Priority": priority, "Date": date, }) # Collect company-level data for insights company_data.append({ "Company": topic, # For now, use topic as company proxy "Sentiment": sentiment, "Confidence": confidence, "Summary": summary, }) # Save markdown report report_body = generate_value_investor_report(topic, news) filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" filepath = os.path.join(DATA_DIR, filename) with open(filepath, "w", encoding="utf-8") as f: f.write(report_body) return all_articles, company_data def build_company_insights(company_data): if not company_data: return pd.DataFrame() df = pd.DataFrame(company_data) insights = [] for company, group in df.groupby("Company"): mentions = len(group) dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral" avg_confidence = round(group["Confidence"].mean(), 2) highlights = " | ".join(group["Summary"].head(2).tolist()) insights.append({ "Company": company, "Mentions": mentions, "Sentiment": dominant_sentiment, "Confidence": avg_confidence, "Highlights": highlights }) return pd.DataFrame(insights) def run_pipeline(csv_path, tavily_api_key, progress_callback=None): os.environ["TAVILY_API_KEY"] = tavily_api_key all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) # Convert markdown to HTML html_paths = [] for md_file in os.listdir(DATA_DIR): if md_file.endswith(".md"): convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) articles_df = pd.DataFrame(all_articles) insights_df = build_company_insights(company_data) return html_paths, articles_df, insights_df # import os # import pandas as pd # from datetime import datetime # from dotenv import load_dotenv # import traceback # from md_html import convert_single_md_to_html as convert_md_to_html # from news_analysis import fetch_deep_news, generate_value_investor_report # from csv_utils import detect_changes # from fin_interpreter import analyze_article # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # DATA_DIR = os.path.join(BASE_DIR, "data") # HTML_DIR = os.path.join(BASE_DIR, "html") # CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv") # os.makedirs(DATA_DIR, exist_ok=True) # os.makedirs(HTML_DIR, exist_ok=True) # load_dotenv() # def build_metrics_box(topic, num_articles): # now = datetime.now().strftime("%Y-%m-%d %H:%M") # return f""" # > Topic: `{topic}` # > Articles Collected: `{num_articles}` # > Generated: `{now}` # > # """ # def run_value_investing_analysis(csv_path, progress_callback=None): # """ # Runs the analysis for all topics in the CSV. # Returns: # md_files (list of md file paths) # all_articles (list of article dicts) # """ # current_df = pd.read_csv(csv_path) # prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv") # if os.path.exists(prev_path): # previous_df = pd.read_csv(prev_path) # changed_df = detect_changes(current_df, previous_df) # if changed_df.empty: # if progress_callback: # progress_callback("✅ No changes detected. Skipping processing.") # return [], [] # else: # changed_df = current_df # new_md_files = [] # all_articles = [] # for _, row in changed_df.iterrows(): # topic = row.get("topic") # timespan = row.get("timespan_days", 7) # msg = f"🔍 Processing: {topic} ({timespan} days)" # print(msg) # if progress_callback: # progress_callback(msg) # news = fetch_deep_news(topic, timespan) # if not news: # warning = f"⚠️ No news found for: {topic}" # print(warning) # if progress_callback: # progress_callback(warning) # continue # # Add articles to all_articles # for article in news: # try: # res = analyze_article(article.get("summary", "")) # if isinstance(res, dict): # sentiment = res.get("sentiment") # confidence = res.get("confidence") # signal = res.get("signal") # else: # sentiment, confidence, signal = res[0], res[1], res[2] # except Exception as e: # sentiment, confidence, signal = "Unknown", 0.0, "None" # print(f"Error analyzing article: {e}") # all_articles.append({ # "Title": article.get("title"), # "URL": article.get("url"), # "Summary": article.get("summary"), # "Priority": article.get("priority", "Low"), # "Date": article.get("date"), # "Company": article.get("company", topic), # "Sentiment": sentiment, # "Confidence": confidence, # "Signal": signal # }) # # Generate report # report_body = generate_value_investor_report(topic, news) # metrics_md = build_metrics_box(topic, len(news)) # full_md = metrics_md + report_body # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" # filepath = os.path.join(DATA_DIR, filename) # counter = 1 # while os.path.exists(filepath): # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}_{counter}.md" # filepath = os.path.join(DATA_DIR, filename) # counter += 1 # with open(filepath, "w", encoding="utf-8") as f: # f.write(full_md) # new_md_files.append(filepath) # if progress_callback: # progress_callback(f"✅ Markdown saved to: {DATA_DIR}") # current_df.to_csv(prev_path, index=False) # return new_md_files, all_articles # def run_pipeline(csv_path, tavily_api_key, progress_callback=None): # os.environ["TAVILY_API_KEY"] = tavily_api_key # new_md_files, all_articles = run_value_investing_analysis(csv_path, progress_callback) # new_html_paths = [] # for md_path in new_md_files: # convert_md_to_html(md_path, HTML_DIR) # html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html")) # new_html_paths.append(html_path) # articles_df = pd.DataFrame(all_articles) # insights_df = build_company_insights(articles_df) # return new_html_paths, articles_df, insights_df # def build_company_insights(articles_df): # if articles_df.empty: # return pd.DataFrame() # grouped = ( # articles_df.groupby("Company") # .agg({ # "Title": "count", # "Sentiment": lambda x: x.mode()[0] if not x.mode().empty else "Neutral", # "Signal": lambda x: x.mode()[0] if not x.mode().empty else "Watch" # }) # .reset_index() # .rename(columns={"Title": "Mentions"}) # ) # return grouped # if __name__ == "__main__": # md_files, _ = run_value_investing_analysis(CSV_PATH) # for md in md_files: # convert_md_to_html(md, HTML_DIR) # print(f"🌐 All reports converted to HTML at: {HTML_DIR}")