Prune profile payload for options thesis

This commit is contained in:
Rushabh Gosar
2025-12-29 12:27:30 -08:00
parent 68805ed80a
commit c01a98abce
3 changed files with 273 additions and 74 deletions

View File

@@ -106,6 +106,14 @@ def extract_raw_value(value):
return value
def extract_value(value):
if isinstance(value, dict):
if value.get("raw") is not None:
return value.get("raw")
return value.get("fmt")
return value
def extract_fmt_value(value):
if isinstance(value, dict):
return value.get("fmt")
@@ -548,6 +556,7 @@ def build_profile_key_metrics(summary_detail, key_stats, financial_data, price_d
"dividend_yield": extract_raw_value(summary_detail.get("dividendYield")),
"ex_dividend_date": extract_raw_value(summary_detail.get("exDividendDate")),
"payout_ratio": extract_raw_value(summary_detail.get("payoutRatio")),
"implied_volatility": extract_raw_value(summary_detail.get("impliedVolatility")),
"current_price": pick_value(
extract_raw_value(price_data.get("regularMarketPrice")),
extract_raw_value(financial_data.get("currentPrice")),
@@ -565,6 +574,165 @@ def build_profile_key_metrics(summary_detail, key_stats, financial_data, price_d
}
def simplify_recommendation_trend(trend):
simplified = []
for entry in trend or []:
simplified.append(
{
"period": entry.get("period"),
"strong_buy": entry.get("strongBuy"),
"buy": entry.get("buy"),
"hold": entry.get("hold"),
"sell": entry.get("sell"),
"strong_sell": entry.get("strongSell"),
}
)
return simplified
def simplify_upgrade_history(history, limit=20):
simplified = []
for entry in history or []:
simplified.append(
{
"firm": entry.get("firm"),
"action": entry.get("action"),
"from_grade": entry.get("fromGrade"),
"to_grade": entry.get("toGrade"),
"date": entry.get("epochGradeDate") or entry.get("gradeDate"),
}
)
if limit and len(simplified) >= limit:
break
return simplified
def simplify_ratings_top(payload):
if not payload:
return None
simplified = {}
for key, value in payload.items():
if not isinstance(value, dict):
continue
simplified[key] = {
"analyst": value.get("analyst"),
"rating_current": value.get("rating_current"),
"rating_sentiment": value.get("rating_sentiment"),
"pt_current": value.get("pt_current"),
"adjusted_pt_current": value.get("adjusted_pt_current"),
"announcement_date": value.get("announcement_date"),
"datapoints": value.get("datapoints"),
"scores": {
"dir": extract_value(value.get("dir")),
"mm": extract_value(value.get("mm")),
"pt": extract_value(value.get("pt")),
"fin_score": extract_value(value.get("fin_score")),
},
}
return simplified or None
def summarize_performance(perf_data):
if not perf_data:
return {}
overview = perf_data.get("performanceOverview")
if isinstance(overview, dict):
return {
"as_of_date": extract_value(overview.get("asOfDate")),
"returns": {
"five_day": extract_value(overview.get("fiveDaysReturn")),
"one_month": extract_value(overview.get("oneMonthReturn")),
"three_month": extract_value(overview.get("threeMonthReturn")),
"six_month": extract_value(overview.get("sixMonthReturn")),
"ytd": extract_value(overview.get("ytdReturnPct")),
"one_year": extract_value(overview.get("oneYearTotalReturn")),
"two_year": extract_value(overview.get("twoYearTotalReturn")),
"three_year": extract_value(overview.get("threeYearTotalReturn")),
"five_year": extract_value(overview.get("fiveYearTotalReturn")),
"ten_year": extract_value(overview.get("tenYearTotalReturn")),
"max": extract_value(overview.get("maxReturn")),
},
}
summary = []
for entry in overview or []:
if not isinstance(entry, dict):
continue
summary.append(
{
"period": entry.get("period"),
"performance": extract_value(entry.get("performance")),
"benchmark": extract_value(entry.get("benchmark")),
}
)
return {"periods": summary} if summary else {}
def summarize_earnings(earnings, calendar_events):
earnings = earnings or {}
calendar_events = calendar_events or {}
earnings_chart = earnings.get("earningsChart", {}) or {}
financials_chart = earnings.get("financialsChart", {}) or {}
calendar_earnings = calendar_events.get("earnings", {}) or {}
quarterly = []
for entry in earnings_chart.get("quarterly") or []:
quarterly.append(
{
"quarter": entry.get("date"),
"actual": extract_value(entry.get("actual")),
"estimate": extract_value(entry.get("estimate")),
"surprise": extract_value(entry.get("difference")),
"surprise_percent": extract_value(entry.get("surprisePct")),
}
)
yearly = []
for entry in financials_chart.get("yearly") or []:
yearly.append(
{
"year": entry.get("date"),
"revenue": extract_value(entry.get("revenue")),
"earnings": extract_value(entry.get("earnings")),
}
)
quarterly_financials = []
for entry in financials_chart.get("quarterly") or []:
quarterly_financials.append(
{
"quarter": entry.get("date"),
"revenue": extract_value(entry.get("revenue")),
"earnings": extract_value(entry.get("earnings")),
}
)
return {
"next_earnings_dates": [
extract_value(value) for value in calendar_earnings.get("earningsDate", []) or []
],
"is_earnings_date_estimate": calendar_earnings.get("isEarningsDateEstimate"),
"earnings_estimates": {
"average": extract_value(calendar_earnings.get("earningsAverage")),
"low": extract_value(calendar_earnings.get("earningsLow")),
"high": extract_value(calendar_earnings.get("earningsHigh")),
},
"revenue_estimates": {
"average": extract_value(calendar_earnings.get("revenueAverage")),
"low": extract_value(calendar_earnings.get("revenueLow")),
"high": extract_value(calendar_earnings.get("revenueHigh")),
},
"quarterly_earnings": quarterly[:4],
"yearly_financials": yearly[:4],
"quarterly_financials": quarterly_financials[:4],
"current_quarter_estimate": extract_value(
earnings_chart.get("currentQuarterEstimate")
),
"current_quarter_estimate_date": earnings_chart.get("currentQuarterEstimateDate"),
"current_calendar_quarter": earnings_chart.get("currentCalendarQuarter"),
"current_fiscal_quarter": earnings_chart.get("currentFiscalQuarter"),
}
def scrape_yahoo_profile(symbol):
encoded = urllib.parse.quote(symbol, safe="")
url = f"https://finance.yahoo.com/quote/{encoded}/"
@@ -573,8 +741,6 @@ def scrape_yahoo_profile(symbol):
response_html = None
rendered_html = None
payloads = {}
news = []
news_summary = None
with sync_playwright() as p:
launch_args = chromium_launch_args()
@@ -602,15 +768,7 @@ def scrape_yahoo_profile(symbol):
else:
app.logger.warning("No response body for profile page %s", symbol)
try:
page.wait_for_selector(
'[data-testid="recent-news"], [data-testid="ticker-news-summary"]',
timeout=15000,
)
except Exception as exc:
app.logger.warning("News content not detected for %s: %s", symbol, exc)
page.wait_for_timeout(2000)
page.wait_for_timeout(1000)
rendered_html = page.content()
finally:
browser.close()
@@ -625,11 +783,6 @@ def scrape_yahoo_profile(symbol):
fallback_soup = BeautifulSoup(rendered_html, "html.parser")
payloads = extract_sveltekit_payloads_from_soup(fallback_soup)
if rendered_html:
news_soup = BeautifulSoup(rendered_html, "html.parser")
news = extract_recent_news_from_soup(news_soup, limit=20)
news_summary = extract_news_summary_from_soup(news_soup)
if not payloads:
return {
"error": "No embedded payloads found on profile page",
@@ -653,7 +806,6 @@ def scrape_yahoo_profile(symbol):
quote = extract_quote_response(quote_payload)
quote_type = extract_quote_type(quote_type_payload)
summary_profile = quote_summary.get("summaryProfile", {}) if quote_summary else {}
summary_detail = quote_summary.get("summaryDetail", {}) if quote_summary else {}
key_stats = quote_summary.get("defaultKeyStatistics", {}) if quote_summary else {}
financial_data = quote_summary.get("financialData", {}) if quote_summary else {}
@@ -665,8 +817,6 @@ def scrape_yahoo_profile(symbol):
quote_summary.get("upgradeDowngradeHistory", {}) if quote_summary else {}
)
earnings = quote_summary.get("earnings", {}) if quote_summary else {}
earnings_gaap = quote_summary.get("earningsGaap", {}) if quote_summary else {}
earnings_non_gaap = quote_summary.get("earningsNonGaap", {}) if quote_summary else {}
calendar_events = quote_summary.get("calendarEvents", {}) if quote_summary else {}
equity_performance = (
quote_summary.get("equityPerformance", {}) if quote_summary else {}
@@ -681,6 +831,79 @@ def scrape_yahoo_profile(symbol):
summary_detail, key_stats, financial_data, price_data, quote
)
valuation = {
"market_cap": extract_raw_value(key_stats.get("marketCap")),
"enterprise_value": extract_raw_value(key_stats.get("enterpriseValue")),
"price_to_book": extract_raw_value(key_stats.get("priceToBook")),
"price_to_sales": extract_raw_value(key_stats.get("priceToSalesTrailing12Months")),
"trailing_pe": key_metrics.get("trailing_pe"),
"forward_pe": key_metrics.get("forward_pe"),
}
profitability = {
"profit_margins": extract_raw_value(financial_data.get("profitMargins")),
"operating_margins": extract_raw_value(financial_data.get("operatingMargins")),
"gross_margins": extract_raw_value(financial_data.get("grossMargins")),
"ebitda_margins": extract_raw_value(financial_data.get("ebitdaMargins")),
"return_on_assets": extract_raw_value(financial_data.get("returnOnAssets")),
"return_on_equity": extract_raw_value(financial_data.get("returnOnEquity")),
}
growth = {
"revenue_growth": extract_raw_value(financial_data.get("revenueGrowth")),
"earnings_growth": extract_raw_value(financial_data.get("earningsGrowth")),
"revenue_per_share": extract_raw_value(financial_data.get("revenuePerShare")),
}
financial_strength = {
"total_cash": extract_raw_value(financial_data.get("totalCash")),
"total_debt": extract_raw_value(financial_data.get("totalDebt")),
"debt_to_equity": extract_raw_value(financial_data.get("debtToEquity")),
"current_ratio": extract_raw_value(financial_data.get("currentRatio")),
"quick_ratio": extract_raw_value(financial_data.get("quickRatio")),
}
cashflow = {
"operating_cashflow": extract_raw_value(financial_data.get("operatingCashflow")),
"free_cashflow": extract_raw_value(financial_data.get("freeCashflow")),
"ebitda": extract_raw_value(financial_data.get("ebitda")),
}
ownership = {
"shares_outstanding": extract_raw_value(key_stats.get("sharesOutstanding")),
"float_shares": extract_raw_value(key_stats.get("floatShares")),
"shares_short": extract_raw_value(key_stats.get("sharesShort")),
"short_ratio": extract_raw_value(key_stats.get("shortRatio")),
"short_percent_of_float": extract_raw_value(key_stats.get("shortPercentOfFloat")),
"held_percent_insiders": extract_raw_value(key_stats.get("heldPercentInsiders")),
"held_percent_institutions": extract_raw_value(
key_stats.get("heldPercentInstitutions")
),
}
analyst = {
"recommendation": {
"key": key_metrics.get("recommendation_key"),
"mean": key_metrics.get("recommendation_mean"),
"analyst_opinion_count": key_metrics.get("analyst_opinion_count"),
"target_price_high": key_metrics.get("target_price_high"),
"target_price_low": key_metrics.get("target_price_low"),
"target_price_mean": key_metrics.get("target_price_mean"),
"target_price_median": key_metrics.get("target_price_median"),
},
"trend": simplify_recommendation_trend(recommendation_trend.get("trend")),
"upgrades_downgrades": simplify_upgrade_history(
upgrade_history.get("history"), limit=20
),
"ratings_top": simplify_ratings_top(ratings_payload),
}
earnings_summary = summarize_earnings(earnings, calendar_events)
performance_summary = {
"equity_performance": summarize_performance(equity_performance),
"unadjusted_performance": summarize_performance(performance_overview),
}
matched_symbols = []
for candidate in [
price_data.get("symbol") if price_data else None,
@@ -733,26 +956,16 @@ def scrape_yahoo_profile(symbol):
"url": url,
"fetched_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
"validation": validation,
"company_profile": summary_profile,
"summary_detail": summary_detail,
"default_key_statistics": key_stats,
"financial_data": financial_data,
"price": price_data,
"earnings": earnings,
"earnings_gaap": earnings_gaap,
"earnings_non_gaap": earnings_non_gaap,
"calendar_events": calendar_events,
"equity_performance": equity_performance,
"performance_overview": performance_overview,
"recommendation_trend": recommendation_trend,
"upgrade_downgrade_history": upgrade_history,
"key_metrics": key_metrics,
"quote": quote,
"quote_type": quote_type,
"recommendations_by_symbol": recs_payload,
"ratings_top": ratings_payload,
"news_summary": news_summary,
"recent_news": news,
"valuation": valuation,
"profitability": profitability,
"growth": growth,
"financial_strength": financial_strength,
"cashflow": cashflow,
"ownership": ownership,
"analyst": analyst,
"earnings": earnings_summary,
"performance": performance_summary,
"data_sources": {
"quote_summary": quote_summary_url,
"quote": quote_url,