from flask import Flask, jsonify, request
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import urllib.parse
import logging
import time
app = Flask(__name__)
# Logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s"
)
app.logger.setLevel(logging.INFO)
def scrape_yahoo_options(symbol):
encoded = urllib.parse.quote(symbol, safe="")
url = f"https://finance.yahoo.com/quote/{encoded}/options/"
app.logger.info("Starting scrape for symbol=%s url=%s", symbol, url)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.set_extra_http_headers({
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
)
})
page.goto(url, wait_until="domcontentloaded", timeout=60000)
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
# --- FIXED: Yahoo changed all classnames. We no longer depend on them. ---
# We simply wait until at least TWO
tags appear.
app.logger.info("Waiting for options tables...")
# Wait for any table to exist
page.wait_for_selector("table", timeout=30000)
# Repeatedly check until 2 tables appear
for _ in range(30): # 30 × 1s = 30 seconds
tables = page.query_selector_all("table")
if len(tables) >= 2:
break
time.sleep(1)
tables = page.query_selector_all("table")
if len(tables) < 2:
app.logger.error("Only %d tables found — expected 2. HTML changed?", len(tables))
browser.close()
return {"error": "Could not locate options tables", "stock": symbol}
app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))
calls_html = tables[0].evaluate("el => el.outerHTML")
puts_html = tables[1].evaluate("el => el.outerHTML")
# --- Extract current price ---
price = None
try:
# Primary selector
price_text = page.locator("fin-streamer[data-field='regularMarketPrice']").inner_text()
price = float(price_text.replace(",", ""))
except:
try:
# Fallback
price_text = page.locator("span[data-testid='qsp-price']").inner_text()
price = float(price_text.replace(",", ""))
except Exception as e:
app.logger.warning("Failed to extract price for %s: %s", symbol, e)
app.logger.info("Current price for %s = %s", symbol, price)
browser.close()
# ----------------------------------------------------------------------
# Parsing Table HTML
# ----------------------------------------------------------------------
def parse_table(table_html, side):
if not table_html:
app.logger.warning("No %s table HTML for %s", side, symbol)
return []
soup = BeautifulSoup(table_html, "html.parser")
headers = [th.get_text(strip=True) for th in soup.select("thead th")]
rows = soup.select("tbody tr")
parsed = []
for r in rows:
tds = r.find_all("td")
if len(tds) != len(headers):
continue
item = {}
for i, c in enumerate(tds):
key = headers[i]
val = c.get_text(" ", strip=True)
# Convert numeric fields
if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:
try:
val = float(val.replace(",", ""))
except:
val = None
elif key in ["Volume", "Open Interest"]:
try:
val = int(val.replace(",", ""))
except:
val = None
elif val in ["-", ""]:
val = None
item[key] = val
parsed.append(item)
app.logger.info("Parsed %d %s rows", len(parsed), side)
return parsed
calls_full = parse_table(calls_html, "calls")
puts_full = parse_table(puts_html, "puts")
# ----------------------------------------------------------------------
# Pruning logic
# ----------------------------------------------------------------------
def prune_nearest(options, price_value, limit=26, side=""):
if price_value is None:
return options, 0
numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]
if len(numeric) <= limit:
return numeric, 0
sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))
pruned = sorted_opts[:limit]
pruned_count = len(options) - len(pruned)
return pruned, pruned_count
calls, pruned_calls = prune_nearest(calls_full, price, side="calls")
puts, pruned_puts = prune_nearest(puts_full, price, side="puts")
def strike_range(opts):
strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]
return [min(strikes), max(strikes)] if strikes else [None, None]
return {
"stock": symbol,
"url": url,
"current_price": price,
"calls": calls,
"puts": puts,
"calls_strike_range": strike_range(calls),
"puts_strike_range": strike_range(puts),
"total_calls": len(calls),
"total_puts": len(puts),
"pruned_calls_count": pruned_calls,
"pruned_puts_count": pruned_puts,
}
@app.route("/scrape_sync")
def scrape_sync():
symbol = request.args.get("stock", "MSFT")
app.logger.info("Received /scrape_sync request for symbol=%s", symbol)
return jsonify(scrape_yahoo_options(symbol))
if __name__ == "__main__":
app.run(host="0.0.0.0", port=9777)