Add docker artifacts and agent context
This commit is contained in:
13
.dockerignore
Normal file
13
.dockerignore
Normal file
@@ -0,0 +1,13 @@
|
||||
.git/
|
||||
.gitignore
|
||||
__pycache__/
|
||||
*.pyc
|
||||
venv/
|
||||
.venv/
|
||||
.env
|
||||
.env.*
|
||||
.pytest_cache/
|
||||
charts/
|
||||
yahoo.html
|
||||
scraper_service(works).py
|
||||
scraper_service.working.backup.py
|
||||
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
venv/
|
||||
.venv/
|
||||
.env
|
||||
.env.*
|
||||
.pytest_cache/
|
||||
424
AGENTS.md
Normal file
424
AGENTS.md
Normal file
@@ -0,0 +1,424 @@
|
||||
# AGENTS.md
|
||||
|
||||
## Context
|
||||
- This project exposes a Flask API that uses Playwright to scrape Yahoo Finance options chains.
|
||||
- Entry point: `scraper_service.py` (launched via `runner.bat` or directly with Python).
|
||||
- API route: `GET /scrape_sync` with `stock` and optional `expiration|expiry|date` parameters.
|
||||
- Expiration inputs: epoch seconds (Yahoo date param) or date strings supported by `DATE_FORMATS`.
|
||||
|
||||
## Docker
|
||||
- Build: `docker build -t <image>:latest .`
|
||||
- Run: `docker run --rm -p 9777:9777 <image>:latest`
|
||||
- The container uses the Playwright base image with bundled browsers.
|
||||
|
||||
## Line-by-line explanation of scraper_service.py
|
||||
|
||||
- Line 1: Import symbols from flask. Code: `from flask import Flask, jsonify, request`
|
||||
- Line 2: Import symbols from playwright.sync_api. Code: `from playwright.sync_api import sync_playwright`
|
||||
- Line 3: Import symbols from bs4. Code: `from bs4 import BeautifulSoup`
|
||||
- Line 4: Import symbols from datetime. Code: `from datetime import datetime, timezone`
|
||||
- Line 5: Import module urllib.parse. Code: `import urllib.parse`
|
||||
- Line 6: Import module logging. Code: `import logging`
|
||||
- Line 7: Import module re. Code: `import re`
|
||||
- Line 8: Import module time. Code: `import time`
|
||||
- Line 9: Blank line for readability. Code: `<blank>`
|
||||
- Line 10: Create the Flask application instance. Code: `app = Flask(__name__)`
|
||||
- Line 11: Blank line for readability. Code: `<blank>`
|
||||
- Line 12: Comment describing the next block. Code: `# Logging`
|
||||
- Line 13: Configure logging defaults. Code: `logging.basicConfig(`
|
||||
- Line 14: Execute the statement as written. Code: `level=logging.INFO,`
|
||||
- Line 15: Execute the statement as written. Code: `format="%(asctime)s [%(levelname)s] %(message)s"`
|
||||
- Line 16: Close the current block or container. Code: `)`
|
||||
- Line 17: Set the Flask logger level. Code: `app.logger.setLevel(logging.INFO)`
|
||||
- Line 18: Blank line for readability. Code: `<blank>`
|
||||
- Line 19: Define accepted expiration date string formats. Code: `DATE_FORMATS = (`
|
||||
- Line 20: Execute the statement as written. Code: `"%Y-%m-%d",`
|
||||
- Line 21: Execute the statement as written. Code: `"%Y/%m/%d",`
|
||||
- Line 22: Execute the statement as written. Code: `"%Y%m%d",`
|
||||
- Line 23: Execute the statement as written. Code: `"%b %d, %Y",`
|
||||
- Line 24: Execute the statement as written. Code: `"%B %d, %Y",`
|
||||
- Line 25: Close the current block or container. Code: `)`
|
||||
- Line 26: Blank line for readability. Code: `<blank>`
|
||||
- Line 27: Blank line for readability. Code: `<blank>`
|
||||
- Line 28: Define the parse_date function. Code: `def parse_date(value):`
|
||||
- Line 29: Loop over items. Code: `for fmt in DATE_FORMATS:`
|
||||
- Line 30: Start a try block for error handling. Code: `try:`
|
||||
- Line 31: Return a value to the caller. Code: `return datetime.strptime(value, fmt).date()`
|
||||
- Line 32: Handle exceptions for the preceding try block. Code: `except ValueError:`
|
||||
- Line 33: Execute the statement as written. Code: `continue`
|
||||
- Line 34: Return a value to the caller. Code: `return None`
|
||||
- Line 35: Blank line for readability. Code: `<blank>`
|
||||
- Line 36: Blank line for readability. Code: `<blank>`
|
||||
- Line 37: Define the normalize_label function. Code: `def normalize_label(value):`
|
||||
- Line 38: Return a value to the caller. Code: `return " ".join(value.strip().split()).lower()`
|
||||
- Line 39: Blank line for readability. Code: `<blank>`
|
||||
- Line 40: Blank line for readability. Code: `<blank>`
|
||||
- Line 41: Define the format_expiration_label function. Code: `def format_expiration_label(timestamp):`
|
||||
- Line 42: Start a try block for error handling. Code: `try:`
|
||||
- Line 43: Return a value to the caller. Code: `return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")`
|
||||
- Line 44: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 45: Return a value to the caller. Code: `return str(timestamp)`
|
||||
- Line 46: Blank line for readability. Code: `<blank>`
|
||||
- Line 47: Blank line for readability. Code: `<blank>`
|
||||
- Line 48: Define the extract_expiration_dates_from_html function. Code: `def extract_expiration_dates_from_html(html):`
|
||||
- Line 49: Conditional branch. Code: `if not html:`
|
||||
- Line 50: Return a value to the caller. Code: `return []`
|
||||
- Line 51: Blank line for readability. Code: `<blank>`
|
||||
- Line 52: Execute the statement as written. Code: `patterns = (`
|
||||
- Line 53: Execute the statement as written. Code: `r'\\"expirationDates\\":\[(.*?)\]',`
|
||||
- Line 54: Execute the statement as written. Code: `r'"expirationDates":\[(.*?)\]',`
|
||||
- Line 55: Close the current block or container. Code: `)`
|
||||
- Line 56: Execute the statement as written. Code: `match = None`
|
||||
- Line 57: Loop over items. Code: `for pattern in patterns:`
|
||||
- Line 58: Execute the statement as written. Code: `match = re.search(pattern, html, re.DOTALL)`
|
||||
- Line 59: Conditional branch. Code: `if match:`
|
||||
- Line 60: Execute the statement as written. Code: `break`
|
||||
- Line 61: Conditional branch. Code: `if not match:`
|
||||
- Line 62: Return a value to the caller. Code: `return []`
|
||||
- Line 63: Blank line for readability. Code: `<blank>`
|
||||
- Line 64: Execute the statement as written. Code: `raw = match.group(1)`
|
||||
- Line 65: Execute the statement as written. Code: `values = []`
|
||||
- Line 66: Loop over items. Code: `for part in raw.split(","):`
|
||||
- Line 67: Execute the statement as written. Code: `part = part.strip()`
|
||||
- Line 68: Conditional branch. Code: `if part.isdigit():`
|
||||
- Line 69: Start a try block for error handling. Code: `try:`
|
||||
- Line 70: Execute the statement as written. Code: `values.append(int(part))`
|
||||
- Line 71: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 72: Execute the statement as written. Code: `continue`
|
||||
- Line 73: Return a value to the caller. Code: `return values`
|
||||
- Line 74: Blank line for readability. Code: `<blank>`
|
||||
- Line 75: Blank line for readability. Code: `<blank>`
|
||||
- Line 76: Define the build_expiration_options function. Code: `def build_expiration_options(expiration_dates):`
|
||||
- Line 77: Execute the statement as written. Code: `options = []`
|
||||
- Line 78: Loop over items. Code: `for value in expiration_dates or []:`
|
||||
- Line 79: Start a try block for error handling. Code: `try:`
|
||||
- Line 80: Execute the statement as written. Code: `value_int = int(value)`
|
||||
- Line 81: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 82: Execute the statement as written. Code: `continue`
|
||||
- Line 83: Blank line for readability. Code: `<blank>`
|
||||
- Line 84: Execute the statement as written. Code: `label = format_expiration_label(value_int)`
|
||||
- Line 85: Start a try block for error handling. Code: `try:`
|
||||
- Line 86: Execute the statement as written. Code: `date_value = datetime.utcfromtimestamp(value_int).date()`
|
||||
- Line 87: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 88: Execute the statement as written. Code: `date_value = None`
|
||||
- Line 89: Blank line for readability. Code: `<blank>`
|
||||
- Line 90: Execute the statement as written. Code: `options.append({"value": value_int, "label": label, "date": date_value})`
|
||||
- Line 91: Return a value to the caller. Code: `return sorted(options, key=lambda x: x["value"])`
|
||||
- Line 92: Blank line for readability. Code: `<blank>`
|
||||
- Line 93: Blank line for readability. Code: `<blank>`
|
||||
- Line 94: Define the resolve_expiration function. Code: `def resolve_expiration(expiration, options):`
|
||||
- Line 95: Conditional branch. Code: `if not expiration:`
|
||||
- Line 96: Return a value to the caller. Code: `return None, None`
|
||||
- Line 97: Blank line for readability. Code: `<blank>`
|
||||
- Line 98: Execute the statement as written. Code: `raw = expiration.strip()`
|
||||
- Line 99: Conditional branch. Code: `if not raw:`
|
||||
- Line 100: Return a value to the caller. Code: `return None, None`
|
||||
- Line 101: Blank line for readability. Code: `<blank>`
|
||||
- Line 102: Conditional branch. Code: `if raw.isdigit():`
|
||||
- Line 103: Execute the statement as written. Code: `value = int(raw)`
|
||||
- Line 104: Conditional branch. Code: `if options:`
|
||||
- Line 105: Loop over items. Code: `for opt in options:`
|
||||
- Line 106: Conditional branch. Code: `if opt.get("value") == value:`
|
||||
- Line 107: Return a value to the caller. Code: `return value, opt.get("label")`
|
||||
- Line 108: Return a value to the caller. Code: `return None, None`
|
||||
- Line 109: Return a value to the caller. Code: `return value, format_expiration_label(value)`
|
||||
- Line 110: Blank line for readability. Code: `<blank>`
|
||||
- Line 111: Execute the statement as written. Code: `requested_date = parse_date(raw)`
|
||||
- Line 112: Conditional branch. Code: `if requested_date:`
|
||||
- Line 113: Loop over items. Code: `for opt in options:`
|
||||
- Line 114: Conditional branch. Code: `if opt.get("date") == requested_date:`
|
||||
- Line 115: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")`
|
||||
- Line 116: Return a value to the caller. Code: `return None, None`
|
||||
- Line 117: Blank line for readability. Code: `<blank>`
|
||||
- Line 118: Execute the statement as written. Code: `normalized = normalize_label(raw)`
|
||||
- Line 119: Loop over items. Code: `for opt in options:`
|
||||
- Line 120: Conditional branch. Code: `if normalize_label(opt.get("label", "")) == normalized:`
|
||||
- Line 121: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")`
|
||||
- Line 122: Blank line for readability. Code: `<blank>`
|
||||
- Line 123: Return a value to the caller. Code: `return None, None`
|
||||
- Line 124: Blank line for readability. Code: `<blank>`
|
||||
- Line 125: Blank line for readability. Code: `<blank>`
|
||||
- Line 126: Define the wait_for_tables function. Code: `def wait_for_tables(page):`
|
||||
- Line 127: Start a try block for error handling. Code: `try:`
|
||||
- Line 128: Interact with the Playwright page. Code: `page.wait_for_selector(`
|
||||
- Line 129: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table",`
|
||||
- Line 130: Execute the statement as written. Code: `timeout=30000,`
|
||||
- Line 131: Close the current block or container. Code: `)`
|
||||
- Line 132: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 133: Interact with the Playwright page. Code: `page.wait_for_selector("table", timeout=30000)`
|
||||
- Line 134: Blank line for readability. Code: `<blank>`
|
||||
- Line 135: Loop over items. Code: `for _ in range(30): # 30 * 1s = 30 seconds`
|
||||
- Line 136: Collect option tables from the page. Code: `tables = page.query_selector_all(`
|
||||
- Line 137: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table"`
|
||||
- Line 138: Close the current block or container. Code: `)`
|
||||
- Line 139: Conditional branch. Code: `if len(tables) >= 2:`
|
||||
- Line 140: Return a value to the caller. Code: `return tables`
|
||||
- Line 141: Collect option tables from the page. Code: `tables = page.query_selector_all("table")`
|
||||
- Line 142: Conditional branch. Code: `if len(tables) >= 2:`
|
||||
- Line 143: Return a value to the caller. Code: `return tables`
|
||||
- Line 144: Execute the statement as written. Code: `time.sleep(1)`
|
||||
- Line 145: Return a value to the caller. Code: `return []`
|
||||
- Line 146: Blank line for readability. Code: `<blank>`
|
||||
- Line 147: Blank line for readability. Code: `<blank>`
|
||||
- Line 148: Define the scrape_yahoo_options function. Code: `def scrape_yahoo_options(symbol, expiration=None):`
|
||||
- Line 149: URL-encode the stock symbol. Code: `encoded = urllib.parse.quote(symbol, safe="")`
|
||||
- Line 150: Build the base Yahoo Finance options URL. Code: `base_url = f"https://finance.yahoo.com/quote/{encoded}/options/"`
|
||||
- Line 151: Normalize the expiration input string. Code: `requested_expiration = expiration.strip() if expiration else None`
|
||||
- Line 152: Conditional branch. Code: `if not requested_expiration:`
|
||||
- Line 153: Normalize the expiration input string. Code: `requested_expiration = None`
|
||||
- Line 154: Set the URL to load. Code: `url = base_url`
|
||||
- Line 155: Blank line for readability. Code: `<blank>`
|
||||
- Line 156: Emit or configure a log message. Code: `app.logger.info(`
|
||||
- Line 157: Execute the statement as written. Code: `"Starting scrape for symbol=%s expiration=%s url=%s",`
|
||||
- Line 158: Execute the statement as written. Code: `symbol,`
|
||||
- Line 159: Execute the statement as written. Code: `requested_expiration,`
|
||||
- Line 160: Execute the statement as written. Code: `base_url,`
|
||||
- Line 161: Close the current block or container. Code: `)`
|
||||
- Line 162: Blank line for readability. Code: `<blank>`
|
||||
- Line 163: Reserve storage for options table HTML. Code: `calls_html = None`
|
||||
- Line 164: Reserve storage for options table HTML. Code: `puts_html = None`
|
||||
- Line 165: Initialize or assign the current price. Code: `price = None`
|
||||
- Line 166: Track the resolved expiration metadata. Code: `selected_expiration_value = None`
|
||||
- Line 167: Track the resolved expiration metadata. Code: `selected_expiration_label = None`
|
||||
- Line 168: Prepare or update the list of available expirations. Code: `expiration_options = []`
|
||||
- Line 169: Track the resolved expiration epoch timestamp. Code: `target_date = None`
|
||||
- Line 170: Track whether a base-page lookup is needed. Code: `fallback_to_base = False`
|
||||
- Line 171: Blank line for readability. Code: `<blank>`
|
||||
- Line 172: Enter a context manager block. Code: `with sync_playwright() as p:`
|
||||
- Line 173: Launch a Playwright browser instance. Code: `browser = p.chromium.launch(headless=True)`
|
||||
- Line 174: Create a new Playwright page. Code: `page = browser.new_page()`
|
||||
- Line 175: Interact with the Playwright page. Code: `page.set_extra_http_headers(`
|
||||
- Line 176: Execute the statement as written. Code: `{`
|
||||
- Line 177: Execute the statement as written. Code: `"User-Agent": (`
|
||||
- Line 178: Execute the statement as written. Code: `"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "`
|
||||
- Line 179: Execute the statement as written. Code: `"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"`
|
||||
- Line 180: Close the current block or container. Code: `)`
|
||||
- Line 181: Close the current block or container. Code: `}`
|
||||
- Line 182: Close the current block or container. Code: `)`
|
||||
- Line 183: Interact with the Playwright page. Code: `page.set_default_timeout(60000)`
|
||||
- Line 184: Blank line for readability. Code: `<blank>`
|
||||
- Line 185: Start a try block for error handling. Code: `try:`
|
||||
- Line 186: Conditional branch. Code: `if requested_expiration:`
|
||||
- Line 187: Conditional branch. Code: `if requested_expiration.isdigit():`
|
||||
- Line 188: Track the resolved expiration epoch timestamp. Code: `target_date = int(requested_expiration)`
|
||||
- Line 189: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date`
|
||||
- Line 190: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)`
|
||||
- Line 191: Fallback branch. Code: `else:`
|
||||
- Line 192: Execute the statement as written. Code: `parsed_date = parse_date(requested_expiration)`
|
||||
- Line 193: Conditional branch. Code: `if parsed_date:`
|
||||
- Line 194: Track the resolved expiration epoch timestamp. Code: `target_date = int(`
|
||||
- Line 195: Execute the statement as written. Code: `datetime(`
|
||||
- Line 196: Execute the statement as written. Code: `parsed_date.year,`
|
||||
- Line 197: Execute the statement as written. Code: `parsed_date.month,`
|
||||
- Line 198: Execute the statement as written. Code: `parsed_date.day,`
|
||||
- Line 199: Execute the statement as written. Code: `tzinfo=timezone.utc,`
|
||||
- Line 200: Execute the statement as written. Code: `).timestamp()`
|
||||
- Line 201: Close the current block or container. Code: `)`
|
||||
- Line 202: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date`
|
||||
- Line 203: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)`
|
||||
- Line 204: Fallback branch. Code: `else:`
|
||||
- Line 205: Track whether a base-page lookup is needed. Code: `fallback_to_base = True`
|
||||
- Line 206: Blank line for readability. Code: `<blank>`
|
||||
- Line 207: Conditional branch. Code: `if target_date:`
|
||||
- Line 208: Set the URL to load. Code: `url = f"{base_url}?date={target_date}"`
|
||||
- Line 209: Blank line for readability. Code: `<blank>`
|
||||
- Line 210: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)`
|
||||
- Line 211: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)`
|
||||
- Line 212: Blank line for readability. Code: `<blank>`
|
||||
- Line 213: Capture the page HTML content. Code: `html = page.content()`
|
||||
- Line 214: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_html(html)`
|
||||
- Line 215: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)`
|
||||
- Line 216: Blank line for readability. Code: `<blank>`
|
||||
- Line 217: Conditional branch. Code: `if fallback_to_base:`
|
||||
- Line 218: Execute the statement as written. Code: `resolved_value, resolved_label = resolve_expiration(`
|
||||
- Line 219: Execute the statement as written. Code: `requested_expiration, expiration_options`
|
||||
- Line 220: Close the current block or container. Code: `)`
|
||||
- Line 221: Conditional branch. Code: `if resolved_value is None:`
|
||||
- Line 222: Return a value to the caller. Code: `return {`
|
||||
- Line 223: Execute the statement as written. Code: `"error": "Requested expiration not available",`
|
||||
- Line 224: Execute the statement as written. Code: `"stock": symbol,`
|
||||
- Line 225: Execute the statement as written. Code: `"requested_expiration": requested_expiration,`
|
||||
- Line 226: Execute the statement as written. Code: `"available_expirations": [`
|
||||
- Line 227: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}`
|
||||
- Line 228: Loop over items. Code: `for opt in expiration_options`
|
||||
- Line 229: Close the current block or container. Code: `],`
|
||||
- Line 230: Close the current block or container. Code: `}`
|
||||
- Line 231: Blank line for readability. Code: `<blank>`
|
||||
- Line 232: Track the resolved expiration epoch timestamp. Code: `target_date = resolved_value`
|
||||
- Line 233: Track the resolved expiration metadata. Code: `selected_expiration_value = resolved_value`
|
||||
- Line 234: Track the resolved expiration metadata. Code: `selected_expiration_label = resolved_label or format_expiration_label(`
|
||||
- Line 235: Execute the statement as written. Code: `resolved_value`
|
||||
- Line 236: Close the current block or container. Code: `)`
|
||||
- Line 237: Set the URL to load. Code: `url = f"{base_url}?date={resolved_value}"`
|
||||
- Line 238: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)`
|
||||
- Line 239: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)`
|
||||
- Line 240: Blank line for readability. Code: `<blank>`
|
||||
- Line 241: Capture the page HTML content. Code: `html = page.content()`
|
||||
- Line 242: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_html(html)`
|
||||
- Line 243: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)`
|
||||
- Line 244: Blank line for readability. Code: `<blank>`
|
||||
- Line 245: Conditional branch. Code: `if target_date and expiration_options:`
|
||||
- Line 246: Execute the statement as written. Code: `matched = None`
|
||||
- Line 247: Loop over items. Code: `for opt in expiration_options:`
|
||||
- Line 248: Conditional branch. Code: `if opt.get("value") == target_date:`
|
||||
- Line 249: Execute the statement as written. Code: `matched = opt`
|
||||
- Line 250: Execute the statement as written. Code: `break`
|
||||
- Line 251: Conditional branch. Code: `if not matched:`
|
||||
- Line 252: Return a value to the caller. Code: `return {`
|
||||
- Line 253: Execute the statement as written. Code: `"error": "Requested expiration not available",`
|
||||
- Line 254: Execute the statement as written. Code: `"stock": symbol,`
|
||||
- Line 255: Execute the statement as written. Code: `"requested_expiration": requested_expiration,`
|
||||
- Line 256: Execute the statement as written. Code: `"available_expirations": [`
|
||||
- Line 257: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}`
|
||||
- Line 258: Loop over items. Code: `for opt in expiration_options`
|
||||
- Line 259: Close the current block or container. Code: `],`
|
||||
- Line 260: Close the current block or container. Code: `}`
|
||||
- Line 261: Track the resolved expiration metadata. Code: `selected_expiration_label = matched.get("label")`
|
||||
- Line 262: Alternative conditional branch. Code: `elif expiration_options and not target_date:`
|
||||
- Line 263: Track the resolved expiration metadata. Code: `selected_expiration_value = expiration_options[0].get("value")`
|
||||
- Line 264: Track the resolved expiration metadata. Code: `selected_expiration_label = expiration_options[0].get("label")`
|
||||
- Line 265: Blank line for readability. Code: `<blank>`
|
||||
- Line 266: Emit or configure a log message. Code: `app.logger.info("Waiting for options tables...")`
|
||||
- Line 267: Blank line for readability. Code: `<blank>`
|
||||
- Line 268: Collect option tables from the page. Code: `tables = wait_for_tables(page)`
|
||||
- Line 269: Conditional branch. Code: `if len(tables) < 2:`
|
||||
- Line 270: Emit or configure a log message. Code: `app.logger.error(`
|
||||
- Line 271: Execute the statement as written. Code: `"Only %d tables found; expected 2. HTML may have changed.",`
|
||||
- Line 272: Execute the statement as written. Code: `len(tables),`
|
||||
- Line 273: Close the current block or container. Code: `)`
|
||||
- Line 274: Return a value to the caller. Code: `return {"error": "Could not locate options tables", "stock": symbol}`
|
||||
- Line 275: Blank line for readability. Code: `<blank>`
|
||||
- Line 276: Emit or configure a log message. Code: `app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))`
|
||||
- Line 277: Blank line for readability. Code: `<blank>`
|
||||
- Line 278: Reserve storage for options table HTML. Code: `calls_html = tables[0].evaluate("el => el.outerHTML")`
|
||||
- Line 279: Reserve storage for options table HTML. Code: `puts_html = tables[1].evaluate("el => el.outerHTML")`
|
||||
- Line 280: Blank line for readability. Code: `<blank>`
|
||||
- Line 281: Comment describing the next block. Code: `# --- Extract current price ---`
|
||||
- Line 282: Start a try block for error handling. Code: `try:`
|
||||
- Line 283: Comment describing the next block. Code: `# Primary selector`
|
||||
- Line 284: Read the current price text from the page. Code: `price_text = page.locator(`
|
||||
- Line 285: Execute the statement as written. Code: `"fin-streamer[data-field='regularMarketPrice']"`
|
||||
- Line 286: Execute the statement as written. Code: `).inner_text()`
|
||||
- Line 287: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))`
|
||||
- Line 288: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 289: Start a try block for error handling. Code: `try:`
|
||||
- Line 290: Comment describing the next block. Code: `# Fallback`
|
||||
- Line 291: Read the current price text from the page. Code: `price_text = page.locator("span[data-testid='qsp-price']").inner_text()`
|
||||
- Line 292: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))`
|
||||
- Line 293: Handle exceptions for the preceding try block. Code: `except Exception as e:`
|
||||
- Line 294: Emit or configure a log message. Code: `app.logger.warning("Failed to extract price for %s: %s", symbol, e)`
|
||||
- Line 295: Blank line for readability. Code: `<blank>`
|
||||
- Line 296: Emit or configure a log message. Code: `app.logger.info("Current price for %s = %s", symbol, price)`
|
||||
- Line 297: Execute the statement as written. Code: `finally:`
|
||||
- Line 298: Execute the statement as written. Code: `browser.close()`
|
||||
- Line 299: Blank line for readability. Code: `<blank>`
|
||||
- Line 300: Comment describing the next block. Code: `# ----------------------------------------------------------------------`
|
||||
- Line 301: Comment describing the next block. Code: `# Parsing Table HTML`
|
||||
- Line 302: Comment describing the next block. Code: `# ----------------------------------------------------------------------`
|
||||
- Line 303: Define the parse_table function. Code: `def parse_table(table_html, side):`
|
||||
- Line 304: Conditional branch. Code: `if not table_html:`
|
||||
- Line 305: Emit or configure a log message. Code: `app.logger.warning("No %s table HTML for %s", side, symbol)`
|
||||
- Line 306: Return a value to the caller. Code: `return []`
|
||||
- Line 307: Blank line for readability. Code: `<blank>`
|
||||
- Line 308: Execute the statement as written. Code: `soup = BeautifulSoup(table_html, "html.parser")`
|
||||
- Line 309: Blank line for readability. Code: `<blank>`
|
||||
- Line 310: Extract header labels from the table. Code: `headers = [th.get_text(strip=True) for th in soup.select("thead th")]`
|
||||
- Line 311: Collect table rows for parsing. Code: `rows = soup.select("tbody tr")`
|
||||
- Line 312: Blank line for readability. Code: `<blank>`
|
||||
- Line 313: Initialize the parsed rows list. Code: `parsed = []`
|
||||
- Line 314: Loop over items. Code: `for r in rows:`
|
||||
- Line 315: Collect table cells for the current row. Code: `tds = r.find_all("td")`
|
||||
- Line 316: Conditional branch. Code: `if len(tds) != len(headers):`
|
||||
- Line 317: Execute the statement as written. Code: `continue`
|
||||
- Line 318: Blank line for readability. Code: `<blank>`
|
||||
- Line 319: Initialize a row dictionary. Code: `item = {}`
|
||||
- Line 320: Loop over items. Code: `for i, c in enumerate(tds):`
|
||||
- Line 321: Read the header name for the current column. Code: `key = headers[i]`
|
||||
- Line 322: Read or convert the cell value. Code: `val = c.get_text(" ", strip=True)`
|
||||
- Line 323: Blank line for readability. Code: `<blank>`
|
||||
- Line 324: Comment describing the next block. Code: `# Convert numeric fields`
|
||||
- Line 325: Conditional branch. Code: `if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:`
|
||||
- Line 326: Start a try block for error handling. Code: `try:`
|
||||
- Line 327: Read or convert the cell value. Code: `val = float(val.replace(",", ""))`
|
||||
- Line 328: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 329: Read or convert the cell value. Code: `val = None`
|
||||
- Line 330: Alternative conditional branch. Code: `elif key in ["Volume", "Open Interest"]:`
|
||||
- Line 331: Start a try block for error handling. Code: `try:`
|
||||
- Line 332: Read or convert the cell value. Code: `val = int(val.replace(",", ""))`
|
||||
- Line 333: Handle exceptions for the preceding try block. Code: `except Exception:`
|
||||
- Line 334: Read or convert the cell value. Code: `val = None`
|
||||
- Line 335: Alternative conditional branch. Code: `elif val in ["-", ""]:`
|
||||
- Line 336: Read or convert the cell value. Code: `val = None`
|
||||
- Line 337: Blank line for readability. Code: `<blank>`
|
||||
- Line 338: Execute the statement as written. Code: `item[key] = val`
|
||||
- Line 339: Blank line for readability. Code: `<blank>`
|
||||
- Line 340: Execute the statement as written. Code: `parsed.append(item)`
|
||||
- Line 341: Blank line for readability. Code: `<blank>`
|
||||
- Line 342: Emit or configure a log message. Code: `app.logger.info("Parsed %d %s rows", len(parsed), side)`
|
||||
- Line 343: Return a value to the caller. Code: `return parsed`
|
||||
- Line 344: Blank line for readability. Code: `<blank>`
|
||||
- Line 345: Parse the full calls and puts tables. Code: `calls_full = parse_table(calls_html, "calls")`
|
||||
- Line 346: Parse the full calls and puts tables. Code: `puts_full = parse_table(puts_html, "puts")`
|
||||
- Line 347: Blank line for readability. Code: `<blank>`
|
||||
- Line 348: Comment describing the next block. Code: `# ----------------------------------------------------------------------`
|
||||
- Line 349: Comment describing the next block. Code: `# Pruning logic`
|
||||
- Line 350: Comment describing the next block. Code: `# ----------------------------------------------------------------------`
|
||||
- Line 351: Define the prune_nearest function. Code: `def prune_nearest(options, price_value, limit=26, side=""):`
|
||||
- Line 352: Conditional branch. Code: `if price_value is None:`
|
||||
- Line 353: Return a value to the caller. Code: `return options, 0`
|
||||
- Line 354: Blank line for readability. Code: `<blank>`
|
||||
- Line 355: Filter options to numeric strike entries. Code: `numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]`
|
||||
- Line 356: Blank line for readability. Code: `<blank>`
|
||||
- Line 357: Conditional branch. Code: `if len(numeric) <= limit:`
|
||||
- Line 358: Return a value to the caller. Code: `return numeric, 0`
|
||||
- Line 359: Blank line for readability. Code: `<blank>`
|
||||
- Line 360: Sort options by distance to current price. Code: `sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))`
|
||||
- Line 361: Keep the closest strike entries. Code: `pruned = sorted_opts[:limit]`
|
||||
- Line 362: Compute how many rows were pruned. Code: `pruned_count = len(options) - len(pruned)`
|
||||
- Line 363: Return a value to the caller. Code: `return pruned, pruned_count`
|
||||
- Line 364: Blank line for readability. Code: `<blank>`
|
||||
- Line 365: Apply pruning to calls. Code: `calls, pruned_calls = prune_nearest(calls_full, price, side="calls")`
|
||||
- Line 366: Apply pruning to puts. Code: `puts, pruned_puts = prune_nearest(puts_full, price, side="puts")`
|
||||
- Line 367: Blank line for readability. Code: `<blank>`
|
||||
- Line 368: Define the strike_range function. Code: `def strike_range(opts):`
|
||||
- Line 369: Collect strike prices from the option list. Code: `strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]`
|
||||
- Line 370: Return a value to the caller. Code: `return [min(strikes), max(strikes)] if strikes else [None, None]`
|
||||
- Line 371: Blank line for readability. Code: `<blank>`
|
||||
- Line 372: Return a value to the caller. Code: `return {`
|
||||
- Line 373: Execute the statement as written. Code: `"stock": symbol,`
|
||||
- Line 374: Execute the statement as written. Code: `"url": url,`
|
||||
- Line 375: Execute the statement as written. Code: `"requested_expiration": requested_expiration,`
|
||||
- Line 376: Execute the statement as written. Code: `"selected_expiration": {`
|
||||
- Line 377: Execute the statement as written. Code: `"value": selected_expiration_value,`
|
||||
- Line 378: Execute the statement as written. Code: `"label": selected_expiration_label,`
|
||||
- Line 379: Close the current block or container. Code: `},`
|
||||
- Line 380: Execute the statement as written. Code: `"current_price": price,`
|
||||
- Line 381: Execute the statement as written. Code: `"calls": calls,`
|
||||
- Line 382: Execute the statement as written. Code: `"puts": puts,`
|
||||
- Line 383: Execute the statement as written. Code: `"calls_strike_range": strike_range(calls),`
|
||||
- Line 384: Execute the statement as written. Code: `"puts_strike_range": strike_range(puts),`
|
||||
- Line 385: Execute the statement as written. Code: `"total_calls": len(calls),`
|
||||
- Line 386: Execute the statement as written. Code: `"total_puts": len(puts),`
|
||||
- Line 387: Execute the statement as written. Code: `"pruned_calls_count": pruned_calls,`
|
||||
- Line 388: Execute the statement as written. Code: `"pruned_puts_count": pruned_puts,`
|
||||
- Line 389: Close the current block or container. Code: `}`
|
||||
- Line 390: Blank line for readability. Code: `<blank>`
|
||||
- Line 391: Blank line for readability. Code: `<blank>`
|
||||
- Line 392: Attach the route decorator to the handler. Code: `@app.route("/scrape_sync")`
|
||||
- Line 393: Define the scrape_sync function. Code: `def scrape_sync():`
|
||||
- Line 394: Read the stock symbol parameter. Code: `symbol = request.args.get("stock", "MSFT")`
|
||||
- Line 395: Read the expiration parameters from the request. Code: `expiration = (`
|
||||
- Line 396: Execute the statement as written. Code: `request.args.get("expiration")`
|
||||
- Line 397: Execute the statement as written. Code: `or request.args.get("expiry")`
|
||||
- Line 398: Execute the statement as written. Code: `or request.args.get("date")`
|
||||
- Line 399: Close the current block or container. Code: `)`
|
||||
- Line 400: Emit or configure a log message. Code: `app.logger.info(`
|
||||
- Line 401: Execute the statement as written. Code: `"Received /scrape_sync request for symbol=%s expiration=%s",`
|
||||
- Line 402: Execute the statement as written. Code: `symbol,`
|
||||
- Line 403: Execute the statement as written. Code: `expiration,`
|
||||
- Line 404: Close the current block or container. Code: `)`
|
||||
- Line 405: Return a value to the caller. Code: `return jsonify(scrape_yahoo_options(symbol, expiration))`
|
||||
- Line 406: Blank line for readability. Code: `<blank>`
|
||||
- Line 407: Blank line for readability. Code: `<blank>`
|
||||
- Line 408: Conditional branch. Code: `if __name__ == "__main__":`
|
||||
- Line 409: Run the Flask development server. Code: `app.run(host="0.0.0.0", port=9777)`
|
||||
13
Dockerfile
Normal file
13
Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
FROM mcr.microsoft.com/playwright/python:v1.50.0-jammy
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY scraper_service.py /app/scraper_service.py
|
||||
|
||||
RUN python -m pip install --no-cache-dir flask beautifulsoup4
|
||||
|
||||
EXPOSE 9777
|
||||
|
||||
CMD ["python", "scraper_service.py"]
|
||||
@@ -1,8 +1,10 @@
|
||||
from flask import Flask, jsonify, request
|
||||
from playwright.sync_api import sync_playwright
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime, timezone
|
||||
import urllib.parse
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
app = Flask(__name__)
|
||||
@@ -14,68 +16,286 @@ logging.basicConfig(
|
||||
)
|
||||
app.logger.setLevel(logging.INFO)
|
||||
|
||||
DATE_FORMATS = (
|
||||
"%Y-%m-%d",
|
||||
"%Y/%m/%d",
|
||||
"%Y%m%d",
|
||||
"%b %d, %Y",
|
||||
"%B %d, %Y",
|
||||
)
|
||||
|
||||
def scrape_yahoo_options(symbol):
|
||||
|
||||
def parse_date(value):
|
||||
for fmt in DATE_FORMATS:
|
||||
try:
|
||||
return datetime.strptime(value, fmt).date()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def normalize_label(value):
|
||||
return " ".join(value.strip().split()).lower()
|
||||
|
||||
|
||||
def format_expiration_label(timestamp):
|
||||
try:
|
||||
return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return str(timestamp)
|
||||
|
||||
|
||||
def extract_expiration_dates_from_html(html):
|
||||
if not html:
|
||||
return []
|
||||
|
||||
patterns = (
|
||||
r'\\"expirationDates\\":\[(.*?)\]',
|
||||
r'"expirationDates":\[(.*?)\]',
|
||||
)
|
||||
match = None
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, html, re.DOTALL)
|
||||
if match:
|
||||
break
|
||||
if not match:
|
||||
return []
|
||||
|
||||
raw = match.group(1)
|
||||
values = []
|
||||
for part in raw.split(","):
|
||||
part = part.strip()
|
||||
if part.isdigit():
|
||||
try:
|
||||
values.append(int(part))
|
||||
except Exception:
|
||||
continue
|
||||
return values
|
||||
|
||||
|
||||
def build_expiration_options(expiration_dates):
|
||||
options = []
|
||||
for value in expiration_dates or []:
|
||||
try:
|
||||
value_int = int(value)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
label = format_expiration_label(value_int)
|
||||
try:
|
||||
date_value = datetime.utcfromtimestamp(value_int).date()
|
||||
except Exception:
|
||||
date_value = None
|
||||
|
||||
options.append({"value": value_int, "label": label, "date": date_value})
|
||||
return sorted(options, key=lambda x: x["value"])
|
||||
|
||||
|
||||
def resolve_expiration(expiration, options):
|
||||
if not expiration:
|
||||
return None, None
|
||||
|
||||
raw = expiration.strip()
|
||||
if not raw:
|
||||
return None, None
|
||||
|
||||
if raw.isdigit():
|
||||
value = int(raw)
|
||||
if options:
|
||||
for opt in options:
|
||||
if opt.get("value") == value:
|
||||
return value, opt.get("label")
|
||||
return None, None
|
||||
return value, format_expiration_label(value)
|
||||
|
||||
requested_date = parse_date(raw)
|
||||
if requested_date:
|
||||
for opt in options:
|
||||
if opt.get("date") == requested_date:
|
||||
return opt.get("value"), opt.get("label")
|
||||
return None, None
|
||||
|
||||
normalized = normalize_label(raw)
|
||||
for opt in options:
|
||||
if normalize_label(opt.get("label", "")) == normalized:
|
||||
return opt.get("value"), opt.get("label")
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def wait_for_tables(page):
|
||||
try:
|
||||
page.wait_for_selector(
|
||||
"section[data-testid='options-list-table'] table",
|
||||
timeout=30000,
|
||||
)
|
||||
except Exception:
|
||||
page.wait_for_selector("table", timeout=30000)
|
||||
|
||||
for _ in range(30): # 30 * 1s = 30 seconds
|
||||
tables = page.query_selector_all(
|
||||
"section[data-testid='options-list-table'] table"
|
||||
)
|
||||
if len(tables) >= 2:
|
||||
return tables
|
||||
tables = page.query_selector_all("table")
|
||||
if len(tables) >= 2:
|
||||
return tables
|
||||
time.sleep(1)
|
||||
return []
|
||||
|
||||
|
||||
def scrape_yahoo_options(symbol, expiration=None):
|
||||
encoded = urllib.parse.quote(symbol, safe="")
|
||||
url = f"https://finance.yahoo.com/quote/{encoded}/options/"
|
||||
base_url = f"https://finance.yahoo.com/quote/{encoded}/options/"
|
||||
requested_expiration = expiration.strip() if expiration else None
|
||||
if not requested_expiration:
|
||||
requested_expiration = None
|
||||
url = base_url
|
||||
|
||||
app.logger.info("Starting scrape for symbol=%s url=%s", symbol, url)
|
||||
app.logger.info(
|
||||
"Starting scrape for symbol=%s expiration=%s url=%s",
|
||||
symbol,
|
||||
requested_expiration,
|
||||
base_url,
|
||||
)
|
||||
|
||||
calls_html = None
|
||||
puts_html = None
|
||||
price = None
|
||||
selected_expiration_value = None
|
||||
selected_expiration_label = None
|
||||
expiration_options = []
|
||||
target_date = None
|
||||
fallback_to_base = False
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
page.set_extra_http_headers({
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
|
||||
)
|
||||
})
|
||||
page.set_extra_http_headers(
|
||||
{
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
|
||||
)
|
||||
}
|
||||
)
|
||||
page.set_default_timeout(60000)
|
||||
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
|
||||
|
||||
# --- FIXED: Yahoo changed all classnames. We no longer depend on them. ---
|
||||
# We simply wait until at least TWO <table> tags appear.
|
||||
app.logger.info("Waiting for options tables...")
|
||||
|
||||
# Wait for any table to exist
|
||||
page.wait_for_selector("table", timeout=30000)
|
||||
|
||||
# Repeatedly check until 2 tables appear
|
||||
for _ in range(30): # 30 × 1s = 30 seconds
|
||||
tables = page.query_selector_all("table")
|
||||
if len(tables) >= 2:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
tables = page.query_selector_all("table")
|
||||
if len(tables) < 2:
|
||||
app.logger.error("Only %d tables found — expected 2. HTML changed?", len(tables))
|
||||
browser.close()
|
||||
return {"error": "Could not locate options tables", "stock": symbol}
|
||||
|
||||
app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))
|
||||
|
||||
calls_html = tables[0].evaluate("el => el.outerHTML")
|
||||
puts_html = tables[1].evaluate("el => el.outerHTML")
|
||||
|
||||
# --- Extract current price ---
|
||||
price = None
|
||||
try:
|
||||
# Primary selector
|
||||
price_text = page.locator("fin-streamer[data-field='regularMarketPrice']").inner_text()
|
||||
price = float(price_text.replace(",", ""))
|
||||
except:
|
||||
if requested_expiration:
|
||||
if requested_expiration.isdigit():
|
||||
target_date = int(requested_expiration)
|
||||
selected_expiration_value = target_date
|
||||
selected_expiration_label = format_expiration_label(target_date)
|
||||
else:
|
||||
parsed_date = parse_date(requested_expiration)
|
||||
if parsed_date:
|
||||
target_date = int(
|
||||
datetime(
|
||||
parsed_date.year,
|
||||
parsed_date.month,
|
||||
parsed_date.day,
|
||||
tzinfo=timezone.utc,
|
||||
).timestamp()
|
||||
)
|
||||
selected_expiration_value = target_date
|
||||
selected_expiration_label = format_expiration_label(target_date)
|
||||
else:
|
||||
fallback_to_base = True
|
||||
|
||||
if target_date:
|
||||
url = f"{base_url}?date={target_date}"
|
||||
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
|
||||
|
||||
html = page.content()
|
||||
expiration_dates = extract_expiration_dates_from_html(html)
|
||||
expiration_options = build_expiration_options(expiration_dates)
|
||||
|
||||
if fallback_to_base:
|
||||
resolved_value, resolved_label = resolve_expiration(
|
||||
requested_expiration, expiration_options
|
||||
)
|
||||
if resolved_value is None:
|
||||
return {
|
||||
"error": "Requested expiration not available",
|
||||
"stock": symbol,
|
||||
"requested_expiration": requested_expiration,
|
||||
"available_expirations": [
|
||||
{"label": opt.get("label"), "value": opt.get("value")}
|
||||
for opt in expiration_options
|
||||
],
|
||||
}
|
||||
|
||||
target_date = resolved_value
|
||||
selected_expiration_value = resolved_value
|
||||
selected_expiration_label = resolved_label or format_expiration_label(
|
||||
resolved_value
|
||||
)
|
||||
url = f"{base_url}?date={resolved_value}"
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
|
||||
|
||||
html = page.content()
|
||||
expiration_dates = extract_expiration_dates_from_html(html)
|
||||
expiration_options = build_expiration_options(expiration_dates)
|
||||
|
||||
if target_date and expiration_options:
|
||||
matched = None
|
||||
for opt in expiration_options:
|
||||
if opt.get("value") == target_date:
|
||||
matched = opt
|
||||
break
|
||||
if not matched:
|
||||
return {
|
||||
"error": "Requested expiration not available",
|
||||
"stock": symbol,
|
||||
"requested_expiration": requested_expiration,
|
||||
"available_expirations": [
|
||||
{"label": opt.get("label"), "value": opt.get("value")}
|
||||
for opt in expiration_options
|
||||
],
|
||||
}
|
||||
selected_expiration_label = matched.get("label")
|
||||
elif expiration_options and not target_date:
|
||||
selected_expiration_value = expiration_options[0].get("value")
|
||||
selected_expiration_label = expiration_options[0].get("label")
|
||||
|
||||
app.logger.info("Waiting for options tables...")
|
||||
|
||||
tables = wait_for_tables(page)
|
||||
if len(tables) < 2:
|
||||
app.logger.error(
|
||||
"Only %d tables found; expected 2. HTML may have changed.",
|
||||
len(tables),
|
||||
)
|
||||
return {"error": "Could not locate options tables", "stock": symbol}
|
||||
|
||||
app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))
|
||||
|
||||
calls_html = tables[0].evaluate("el => el.outerHTML")
|
||||
puts_html = tables[1].evaluate("el => el.outerHTML")
|
||||
|
||||
# --- Extract current price ---
|
||||
try:
|
||||
# Fallback
|
||||
price_text = page.locator("span[data-testid='qsp-price']").inner_text()
|
||||
# Primary selector
|
||||
price_text = page.locator(
|
||||
"fin-streamer[data-field='regularMarketPrice']"
|
||||
).inner_text()
|
||||
price = float(price_text.replace(",", ""))
|
||||
except Exception as e:
|
||||
app.logger.warning("Failed to extract price for %s: %s", symbol, e)
|
||||
except Exception:
|
||||
try:
|
||||
# Fallback
|
||||
price_text = page.locator("span[data-testid='qsp-price']").inner_text()
|
||||
price = float(price_text.replace(",", ""))
|
||||
except Exception as e:
|
||||
app.logger.warning("Failed to extract price for %s: %s", symbol, e)
|
||||
|
||||
app.logger.info("Current price for %s = %s", symbol, price)
|
||||
|
||||
browser.close()
|
||||
app.logger.info("Current price for %s = %s", symbol, price)
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Parsing Table HTML
|
||||
@@ -105,12 +325,12 @@ def scrape_yahoo_options(symbol):
|
||||
if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:
|
||||
try:
|
||||
val = float(val.replace(",", ""))
|
||||
except:
|
||||
except Exception:
|
||||
val = None
|
||||
elif key in ["Volume", "Open Interest"]:
|
||||
try:
|
||||
val = int(val.replace(",", ""))
|
||||
except:
|
||||
except Exception:
|
||||
val = None
|
||||
elif val in ["-", ""]:
|
||||
val = None
|
||||
@@ -152,6 +372,11 @@ def scrape_yahoo_options(symbol):
|
||||
return {
|
||||
"stock": symbol,
|
||||
"url": url,
|
||||
"requested_expiration": requested_expiration,
|
||||
"selected_expiration": {
|
||||
"value": selected_expiration_value,
|
||||
"label": selected_expiration_label,
|
||||
},
|
||||
"current_price": price,
|
||||
"calls": calls,
|
||||
"puts": puts,
|
||||
@@ -167,8 +392,17 @@ def scrape_yahoo_options(symbol):
|
||||
@app.route("/scrape_sync")
|
||||
def scrape_sync():
|
||||
symbol = request.args.get("stock", "MSFT")
|
||||
app.logger.info("Received /scrape_sync request for symbol=%s", symbol)
|
||||
return jsonify(scrape_yahoo_options(symbol))
|
||||
expiration = (
|
||||
request.args.get("expiration")
|
||||
or request.args.get("expiry")
|
||||
or request.args.get("date")
|
||||
)
|
||||
app.logger.info(
|
||||
"Received /scrape_sync request for symbol=%s expiration=%s",
|
||||
symbol,
|
||||
expiration,
|
||||
)
|
||||
return jsonify(scrape_yahoo_options(symbol, expiration))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user