from flask import Flask, jsonify, request from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup import urllib.parse import logging app = Flask(__name__) # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s" ) app.logger.setLevel(logging.INFO) def scrape_yahoo_options(symbol): encoded = urllib.parse.quote(symbol, safe="") url = f"https://finance.yahoo.com/quote/{encoded}/options/" app.logger.info("Starting scrape for symbol=%s url=%s", symbol, url) with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() page.set_extra_http_headers({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118 Safari/537.36" }) # Avoid networkidle on Yahoo (it rarely goes “idle” because of ads/streaming) page.goto(url, wait_until="domcontentloaded", timeout=60000) app.logger.info("Page loaded (domcontentloaded) for %s", symbol) # Wait for the options tables page.wait_for_selector( "section[data-testid='options-list-table'] table.yf-wurt5d", timeout=30000 ) app.logger.info("Options tables located in DOM for %s", symbol) # Grab CALLS and PUTS tables separately (first = Calls, second = Puts) tables = page.evaluate(""" () => { const section = document.querySelector('section[data-testid="options-list-table"]'); if (!section) return { calls: null, puts: null }; const tbs = section.querySelectorAll('table.yf-wurt5d'); const getHTML = el => el ? el.outerHTML : null; return { calls: getHTML(tbs[0] || null), puts: getHTML(tbs[1] || null) }; } """) calls_html = tables.get("calls") if tables else None puts_html = tables.get("puts") if tables else None # Current price price = None try: price_text = page.locator("span[data-testid='qsp-price']").inner_text() price = float(price_text.replace(",", "")) app.logger.info("Current price for %s = %s", symbol, price) except Exception as e: app.logger.warning("Failed to get current price for %s: %s", symbol, e) browser.close() if not calls_html and not puts_html: app.logger.error("Could not locate options tables for %s", symbol) return {"error": "Could not locate options tables", "stock": symbol} def parse_table(table_html, side): if not table_html: app.logger.warning("No %s table HTML present for %s", side, symbol) return [] soup = BeautifulSoup(table_html, "html.parser") headers = [th.get_text(strip=True) for th in soup.select("thead th")] rows = soup.select("tbody tr") parsed_rows = [] for r in rows: cols = r.find_all("td") if len(cols) != len(headers): continue data = {} for i, c in enumerate(cols): key = headers[i] val = c.get_text(" ", strip=True) if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]: try: val = float(val.replace(",", "")) except Exception: val = None elif key in ["Volume", "Open Interest"]: try: val = int(val.replace(",", "")) except Exception: val = None elif val in ["-", ""]: val = None data[key] = val parsed_rows.append(data) app.logger.info("Parsed %d %s rows for %s", len(parsed_rows), side, symbol) return parsed_rows calls_full = parse_table(calls_html, "calls") puts_full = parse_table(puts_html, "puts") def rng(opts): strikes = [r.get("Strike") for r in opts if isinstance(r.get("Strike"), (int, float))] return [min(strikes), max(strikes)] if strikes else [None, None] def prune_nearest(options, price_value, limit=26, side=""): if price_value is None: app.logger.info( "No current price for %s; skipping pruning for %s (keeping %d rows)", symbol, side, len(options) ) return options, 0 numeric_opts = [o for o in options if isinstance(o.get("Strike"), (int, float))] if len(numeric_opts) <= limit: app.logger.info( "Not enough %s rows for pruning for %s: total=%d, limit=%d", side, symbol, len(numeric_opts), limit ) return numeric_opts, 0 sorted_opts = sorted( numeric_opts, key=lambda o: abs(o["Strike"] - price_value) ) pruned_list = sorted_opts[:limit] pruned_count = len(options) - len(pruned_list) app.logger.info( "Pruned %s for %s: original=%d, kept=%d, pruned=%d (limit=%d)", side, symbol, len(options), len(pruned_list), pruned_count, limit ) return pruned_list, pruned_count # ✅ 26 closest by strike on each side calls, pruned_calls_count = prune_nearest(calls_full, price, limit=26, side="calls") puts, pruned_puts_count = prune_nearest(puts_full, price, limit=26, side="puts") calls_range = rng(calls) puts_range = rng(puts) app.logger.info( "Final summary for %s: calls_kept=%d, puts_kept=%d, " "calls_strike_range=%s, puts_strike_range=%s", symbol, len(calls), len(puts), calls_range, puts_range ) return { "stock": symbol, "url": url, "current_price": price, "calls": calls, "puts": puts, "calls_strike_range": calls_range, "puts_strike_range": puts_range, "total_calls": len(calls), "total_puts": len(puts), "pruned_calls_count": pruned_calls_count, "pruned_puts_count": pruned_puts_count, } @app.route("/scrape_sync") def scrape_sync(): symbol = request.args.get("stock", "MSFT") app.logger.info("Received /scrape_sync request for symbol=%s", symbol) data = scrape_yahoo_options(symbol) return jsonify(data) if __name__ == "__main__": app.run(host="0.0.0.0", port=9777)