From 67b8fad4233c9e4cfb4748f93049fb720bc668a5 Mon Sep 17 00:00:00 2001 From: Rushabh Gosar Date: Sun, 28 Dec 2025 00:15:29 -0800 Subject: [PATCH] Fix expiration-specific options parsing --- AGENTS.md | 900 +++++++++++++++++++++++++++------------------ scraper_service.py | 318 +++++++++++++--- 2 files changed, 805 insertions(+), 413 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9c5449a..a446182 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,302 +19,302 @@ - Line 4: Import symbols from datetime. Code: `from datetime import datetime, timezone` - Line 5: Import module urllib.parse. Code: `import urllib.parse` - Line 6: Import module logging. Code: `import logging` -- Line 7: Import module re. Code: `import re` -- Line 8: Import module time. Code: `import time` -- Line 9: Blank line for readability. Code: `` -- Line 10: Create the Flask application instance. Code: `app = Flask(__name__)` -- Line 11: Blank line for readability. Code: `` -- Line 12: Comment describing the next block. Code: `# Logging` -- Line 13: Configure logging defaults. Code: `logging.basicConfig(` -- Line 14: Execute the statement as written. Code: `level=logging.INFO,` -- Line 15: Execute the statement as written. Code: `format="%(asctime)s [%(levelname)s] %(message)s"` -- Line 16: Close the current block or container. Code: `)` -- Line 17: Set the Flask logger level. Code: `app.logger.setLevel(logging.INFO)` -- Line 18: Blank line for readability. Code: `` -- Line 19: Define accepted expiration date string formats. Code: `DATE_FORMATS = (` -- Line 20: Execute the statement as written. Code: `"%Y-%m-%d",` -- Line 21: Execute the statement as written. Code: `"%Y/%m/%d",` -- Line 22: Execute the statement as written. Code: `"%Y%m%d",` -- Line 23: Execute the statement as written. Code: `"%b %d, %Y",` -- Line 24: Execute the statement as written. Code: `"%B %d, %Y",` -- Line 25: Close the current block or container. Code: `)` -- Line 26: Blank line for readability. Code: `` +- Line 7: Import module json. Code: `import json` +- Line 8: Import module re. Code: `import re` +- Line 9: Import module time. Code: `import time` +- Line 10: Blank line for readability. Code: `` +- Line 11: Create the Flask application instance. Code: `app = Flask(__name__)` +- Line 12: Blank line for readability. Code: `` +- Line 13: Comment describing the next block. Code: `# Logging` +- Line 14: Configure logging defaults. Code: `logging.basicConfig(` +- Line 15: Execute the statement as written. Code: `level=logging.INFO,` +- Line 16: Execute the statement as written. Code: `format="%(asctime)s [%(levelname)s] %(message)s"` +- Line 17: Close the current block or container. Code: `)` +- Line 18: Set the Flask logger level. Code: `app.logger.setLevel(logging.INFO)` +- Line 19: Blank line for readability. Code: `` +- Line 20: Define accepted expiration date string formats. Code: `DATE_FORMATS = (` +- Line 21: Execute the statement as written. Code: `"%Y-%m-%d",` +- Line 22: Execute the statement as written. Code: `"%Y/%m/%d",` +- Line 23: Execute the statement as written. Code: `"%Y%m%d",` +- Line 24: Execute the statement as written. Code: `"%b %d, %Y",` +- Line 25: Execute the statement as written. Code: `"%B %d, %Y",` +- Line 26: Close the current block or container. Code: `)` - Line 27: Blank line for readability. Code: `` -- Line 28: Define the parse_date function. Code: `def parse_date(value):` -- Line 29: Loop over items. Code: `for fmt in DATE_FORMATS:` -- Line 30: Start a try block for error handling. Code: `try:` -- Line 31: Return a value to the caller. Code: `return datetime.strptime(value, fmt).date()` -- Line 32: Handle exceptions for the preceding try block. Code: `except ValueError:` -- Line 33: Execute the statement as written. Code: `continue` -- Line 34: Return a value to the caller. Code: `return None` -- Line 35: Blank line for readability. Code: `` +- Line 28: Blank line for readability. Code: `` +- Line 29: Define the parse_date function. Code: `def parse_date(value):` +- Line 30: Loop over items. Code: `for fmt in DATE_FORMATS:` +- Line 31: Start a try block for error handling. Code: `try:` +- Line 32: Return a value to the caller. Code: `return datetime.strptime(value, fmt).date()` +- Line 33: Handle exceptions for the preceding try block. Code: `except ValueError:` +- Line 34: Execute the statement as written. Code: `continue` +- Line 35: Return a value to the caller. Code: `return None` - Line 36: Blank line for readability. Code: `` -- Line 37: Define the normalize_label function. Code: `def normalize_label(value):` -- Line 38: Return a value to the caller. Code: `return " ".join(value.strip().split()).lower()` -- Line 39: Blank line for readability. Code: `` +- Line 37: Blank line for readability. Code: `` +- Line 38: Define the normalize_label function. Code: `def normalize_label(value):` +- Line 39: Return a value to the caller. Code: `return " ".join(value.strip().split()).lower()` - Line 40: Blank line for readability. Code: `` -- Line 41: Define the format_expiration_label function. Code: `def format_expiration_label(timestamp):` -- Line 42: Start a try block for error handling. Code: `try:` -- Line 43: Return a value to the caller. Code: `return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")` -- Line 44: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 45: Return a value to the caller. Code: `return str(timestamp)` -- Line 46: Blank line for readability. Code: `` +- Line 41: Blank line for readability. Code: `` +- Line 42: Define the format_expiration_label function. Code: `def format_expiration_label(timestamp):` +- Line 43: Start a try block for error handling. Code: `try:` +- Line 44: Return a value to the caller. Code: `return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")` +- Line 45: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 46: Return a value to the caller. Code: `return str(timestamp)` - Line 47: Blank line for readability. Code: `` -- Line 48: Define the extract_expiration_dates_from_html function. Code: `def extract_expiration_dates_from_html(html):` -- Line 49: Conditional branch. Code: `if not html:` -- Line 50: Return a value to the caller. Code: `return []` -- Line 51: Blank line for readability. Code: `` -- Line 52: Execute the statement as written. Code: `patterns = (` -- Line 53: Execute the statement as written. Code: `r'\\"expirationDates\\":\[(.*?)\]',` -- Line 54: Execute the statement as written. Code: `r'"expirationDates":\[(.*?)\]',` -- Line 55: Close the current block or container. Code: `)` -- Line 56: Execute the statement as written. Code: `match = None` -- Line 57: Loop over items. Code: `for pattern in patterns:` -- Line 58: Execute the statement as written. Code: `match = re.search(pattern, html, re.DOTALL)` -- Line 59: Conditional branch. Code: `if match:` -- Line 60: Execute the statement as written. Code: `break` -- Line 61: Conditional branch. Code: `if not match:` -- Line 62: Return a value to the caller. Code: `return []` +- Line 48: Blank line for readability. Code: `` +- Line 49: Define the format_percent function. Code: `def format_percent(value):` +- Line 50: Conditional branch. Code: `if value is None:` +- Line 51: Return a value to the caller. Code: `return None` +- Line 52: Start a try block for error handling. Code: `try:` +- Line 53: Return a value to the caller. Code: `return f"{value * 100:.2f}%"` +- Line 54: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 55: Return a value to the caller. Code: `return None` +- Line 56: Blank line for readability. Code: `` +- Line 57: Blank line for readability. Code: `` +- Line 58: Define the extract_raw_value function. Code: `def extract_raw_value(value):` +- Line 59: Conditional branch. Code: `if isinstance(value, dict):` +- Line 60: Return a value to the caller. Code: `return value.get("raw")` +- Line 61: Return a value to the caller. Code: `return value` +- Line 62: Blank line for readability. Code: `` - Line 63: Blank line for readability. Code: `` -- Line 64: Execute the statement as written. Code: `raw = match.group(1)` -- Line 65: Execute the statement as written. Code: `values = []` -- Line 66: Loop over items. Code: `for part in raw.split(","):` -- Line 67: Execute the statement as written. Code: `part = part.strip()` -- Line 68: Conditional branch. Code: `if part.isdigit():` -- Line 69: Start a try block for error handling. Code: `try:` -- Line 70: Execute the statement as written. Code: `values.append(int(part))` -- Line 71: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 72: Execute the statement as written. Code: `continue` -- Line 73: Return a value to the caller. Code: `return values` -- Line 74: Blank line for readability. Code: `` +- Line 64: Define the extract_fmt_value function. Code: `def extract_fmt_value(value):` +- Line 65: Conditional branch. Code: `if isinstance(value, dict):` +- Line 66: Return a value to the caller. Code: `return value.get("fmt")` +- Line 67: Return a value to the caller. Code: `return None` +- Line 68: Blank line for readability. Code: `` +- Line 69: Blank line for readability. Code: `` +- Line 70: Define the format_percent_value function. Code: `def format_percent_value(value):` +- Line 71: Execute the statement as written. Code: `fmt = extract_fmt_value(value)` +- Line 72: Conditional branch. Code: `if fmt is not None:` +- Line 73: Return a value to the caller. Code: `return fmt` +- Line 74: Return a value to the caller. Code: `return format_percent(extract_raw_value(value))` - Line 75: Blank line for readability. Code: `` -- Line 76: Define the build_expiration_options function. Code: `def build_expiration_options(expiration_dates):` -- Line 77: Execute the statement as written. Code: `options = []` -- Line 78: Loop over items. Code: `for value in expiration_dates or []:` -- Line 79: Start a try block for error handling. Code: `try:` -- Line 80: Execute the statement as written. Code: `value_int = int(value)` -- Line 81: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 82: Execute the statement as written. Code: `continue` -- Line 83: Blank line for readability. Code: `` -- Line 84: Execute the statement as written. Code: `label = format_expiration_label(value_int)` -- Line 85: Start a try block for error handling. Code: `try:` -- Line 86: Execute the statement as written. Code: `date_value = datetime.utcfromtimestamp(value_int).date()` -- Line 87: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 88: Execute the statement as written. Code: `date_value = None` -- Line 89: Blank line for readability. Code: `` -- Line 90: Execute the statement as written. Code: `options.append({"value": value_int, "label": label, "date": date_value})` -- Line 91: Return a value to the caller. Code: `return sorted(options, key=lambda x: x["value"])` -- Line 92: Blank line for readability. Code: `` -- Line 93: Blank line for readability. Code: `` -- Line 94: Define the resolve_expiration function. Code: `def resolve_expiration(expiration, options):` -- Line 95: Conditional branch. Code: `if not expiration:` -- Line 96: Return a value to the caller. Code: `return None, None` -- Line 97: Blank line for readability. Code: `` -- Line 98: Execute the statement as written. Code: `raw = expiration.strip()` -- Line 99: Conditional branch. Code: `if not raw:` -- Line 100: Return a value to the caller. Code: `return None, None` -- Line 101: Blank line for readability. Code: `` -- Line 102: Conditional branch. Code: `if raw.isdigit():` -- Line 103: Execute the statement as written. Code: `value = int(raw)` -- Line 104: Conditional branch. Code: `if options:` -- Line 105: Loop over items. Code: `for opt in options:` -- Line 106: Conditional branch. Code: `if opt.get("value") == value:` -- Line 107: Return a value to the caller. Code: `return value, opt.get("label")` -- Line 108: Return a value to the caller. Code: `return None, None` -- Line 109: Return a value to the caller. Code: `return value, format_expiration_label(value)` -- Line 110: Blank line for readability. Code: `` -- Line 111: Execute the statement as written. Code: `requested_date = parse_date(raw)` -- Line 112: Conditional branch. Code: `if requested_date:` -- Line 113: Loop over items. Code: `for opt in options:` -- Line 114: Conditional branch. Code: `if opt.get("date") == requested_date:` -- Line 115: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")` -- Line 116: Return a value to the caller. Code: `return None, None` -- Line 117: Blank line for readability. Code: `` -- Line 118: Execute the statement as written. Code: `normalized = normalize_label(raw)` -- Line 119: Loop over items. Code: `for opt in options:` -- Line 120: Conditional branch. Code: `if normalize_label(opt.get("label", "")) == normalized:` -- Line 121: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")` -- Line 122: Blank line for readability. Code: `` -- Line 123: Return a value to the caller. Code: `return None, None` -- Line 124: Blank line for readability. Code: `` -- Line 125: Blank line for readability. Code: `` -- Line 126: Define the wait_for_tables function. Code: `def wait_for_tables(page):` -- Line 127: Start a try block for error handling. Code: `try:` -- Line 128: Interact with the Playwright page. Code: `page.wait_for_selector(` -- Line 129: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table",` -- Line 130: Execute the statement as written. Code: `timeout=30000,` -- Line 131: Close the current block or container. Code: `)` -- Line 132: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 133: Interact with the Playwright page. Code: `page.wait_for_selector("table", timeout=30000)` -- Line 134: Blank line for readability. Code: `` -- Line 135: Loop over items. Code: `for _ in range(30): # 30 * 1s = 30 seconds` -- Line 136: Collect option tables from the page. Code: `tables = page.query_selector_all(` -- Line 137: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table"` -- Line 138: Close the current block or container. Code: `)` -- Line 139: Conditional branch. Code: `if len(tables) >= 2:` -- Line 140: Return a value to the caller. Code: `return tables` -- Line 141: Collect option tables from the page. Code: `tables = page.query_selector_all("table")` -- Line 142: Conditional branch. Code: `if len(tables) >= 2:` -- Line 143: Return a value to the caller. Code: `return tables` -- Line 144: Execute the statement as written. Code: `time.sleep(1)` -- Line 145: Return a value to the caller. Code: `return []` +- Line 76: Blank line for readability. Code: `` +- Line 77: Define the format_last_trade_date function. Code: `def format_last_trade_date(timestamp):` +- Line 78: Execute the statement as written. Code: `timestamp = extract_raw_value(timestamp)` +- Line 79: Conditional branch. Code: `if not timestamp:` +- Line 80: Return a value to the caller. Code: `return None` +- Line 81: Start a try block for error handling. Code: `try:` +- Line 82: Return a value to the caller. Code: `return datetime.fromtimestamp(timestamp).strftime("%m/%d/%Y %I:%M %p") + " EST"` +- Line 83: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 84: Return a value to the caller. Code: `return None` +- Line 85: Blank line for readability. Code: `` +- Line 86: Blank line for readability. Code: `` +- Line 87: Define the extract_option_chain_from_html function. Code: `def extract_option_chain_from_html(html):` +- Line 88: Conditional branch. Code: `if not html:` +- Line 89: Return a value to the caller. Code: `return None` +- Line 90: Blank line for readability. Code: `` +- Line 91: Execute the statement as written. Code: `token = "\"body\":\""` +- Line 92: Execute the statement as written. Code: `start = 0` +- Line 93: Execute the statement as written. Code: `while True:` +- Line 94: Execute the statement as written. Code: `idx = html.find(token, start)` +- Line 95: Conditional branch. Code: `if idx == -1:` +- Line 96: Execute the statement as written. Code: `break` +- Line 97: Execute the statement as written. Code: `i = idx + len(token)` +- Line 98: Execute the statement as written. Code: `escaped = False` +- Line 99: Execute the statement as written. Code: `raw_chars = []` +- Line 100: Execute the statement as written. Code: `while i < len(html):` +- Line 101: Execute the statement as written. Code: `ch = html[i]` +- Line 102: Conditional branch. Code: `if escaped:` +- Line 103: Execute the statement as written. Code: `raw_chars.append(ch)` +- Line 104: Execute the statement as written. Code: `escaped = False` +- Line 105: Fallback branch. Code: `else:` +- Line 106: Conditional branch. Code: `if ch == "\\":` +- Line 107: Execute the statement as written. Code: `raw_chars.append(ch)` +- Line 108: Execute the statement as written. Code: `escaped = True` +- Line 109: Alternative conditional branch. Code: `elif ch == "\"":` +- Line 110: Execute the statement as written. Code: `break` +- Line 111: Fallback branch. Code: `else:` +- Line 112: Execute the statement as written. Code: `raw_chars.append(ch)` +- Line 113: Execute the statement as written. Code: `i += 1` +- Line 114: Execute the statement as written. Code: `raw = "".join(raw_chars)` +- Line 115: Start a try block for error handling. Code: `try:` +- Line 116: Execute the statement as written. Code: `body_text = json.loads(f"\"{raw}\"")` +- Line 117: Handle exceptions for the preceding try block. Code: `except json.JSONDecodeError:` +- Line 118: Execute the statement as written. Code: `start = idx + len(token)` +- Line 119: Execute the statement as written. Code: `continue` +- Line 120: Conditional branch. Code: `if "optionChain" not in body_text:` +- Line 121: Execute the statement as written. Code: `start = idx + len(token)` +- Line 122: Execute the statement as written. Code: `continue` +- Line 123: Start a try block for error handling. Code: `try:` +- Line 124: Execute the statement as written. Code: `payload = json.loads(body_text)` +- Line 125: Handle exceptions for the preceding try block. Code: `except json.JSONDecodeError:` +- Line 126: Execute the statement as written. Code: `start = idx + len(token)` +- Line 127: Execute the statement as written. Code: `continue` +- Line 128: Execute the statement as written. Code: `option_chain = payload.get("optionChain")` +- Line 129: Conditional branch. Code: `if option_chain and option_chain.get("result"):` +- Line 130: Return a value to the caller. Code: `return option_chain` +- Line 131: Blank line for readability. Code: `` +- Line 132: Execute the statement as written. Code: `start = idx + len(token)` +- Line 133: Blank line for readability. Code: `` +- Line 134: Return a value to the caller. Code: `return None` +- Line 135: Blank line for readability. Code: `` +- Line 136: Blank line for readability. Code: `` +- Line 137: Define the extract_expiration_dates_from_chain function. Code: `def extract_expiration_dates_from_chain(chain):` +- Line 138: Conditional branch. Code: `if not chain:` +- Line 139: Return a value to the caller. Code: `return []` +- Line 140: Blank line for readability. Code: `` +- Line 141: Execute the statement as written. Code: `result = chain.get("result", [])` +- Line 142: Conditional branch. Code: `if not result:` +- Line 143: Return a value to the caller. Code: `return []` +- Line 144: Return a value to the caller. Code: `return result[0].get("expirationDates", []) or []` +- Line 145: Blank line for readability. Code: `` - Line 146: Blank line for readability. Code: `` -- Line 147: Blank line for readability. Code: `` -- Line 148: Define the scrape_yahoo_options function. Code: `def scrape_yahoo_options(symbol, expiration=None):` -- Line 149: URL-encode the stock symbol. Code: `encoded = urllib.parse.quote(symbol, safe="")` -- Line 150: Build the base Yahoo Finance options URL. Code: `base_url = f"https://finance.yahoo.com/quote/{encoded}/options/"` -- Line 151: Normalize the expiration input string. Code: `requested_expiration = expiration.strip() if expiration else None` -- Line 152: Conditional branch. Code: `if not requested_expiration:` -- Line 153: Normalize the expiration input string. Code: `requested_expiration = None` -- Line 154: Set the URL to load. Code: `url = base_url` -- Line 155: Blank line for readability. Code: `` -- Line 156: Emit or configure a log message. Code: `app.logger.info(` -- Line 157: Execute the statement as written. Code: `"Starting scrape for symbol=%s expiration=%s url=%s",` -- Line 158: Execute the statement as written. Code: `symbol,` -- Line 159: Execute the statement as written. Code: `requested_expiration,` -- Line 160: Execute the statement as written. Code: `base_url,` -- Line 161: Close the current block or container. Code: `)` -- Line 162: Blank line for readability. Code: `` -- Line 163: Reserve storage for options table HTML. Code: `calls_html = None` -- Line 164: Reserve storage for options table HTML. Code: `puts_html = None` -- Line 165: Initialize or assign the current price. Code: `price = None` -- Line 166: Track the resolved expiration metadata. Code: `selected_expiration_value = None` -- Line 167: Track the resolved expiration metadata. Code: `selected_expiration_label = None` -- Line 168: Prepare or update the list of available expirations. Code: `expiration_options = []` -- Line 169: Track the resolved expiration epoch timestamp. Code: `target_date = None` -- Line 170: Track whether a base-page lookup is needed. Code: `fallback_to_base = False` +- Line 147: Define the normalize_chain_rows function. Code: `def normalize_chain_rows(rows):` +- Line 148: Execute the statement as written. Code: `normalized = []` +- Line 149: Loop over items. Code: `for row in rows or []:` +- Line 150: Execute the statement as written. Code: `normalized.append(` +- Line 151: Execute the statement as written. Code: `{` +- Line 152: Execute the statement as written. Code: `"Contract Name": row.get("contractSymbol"),` +- Line 153: Execute the statement as written. Code: `"Last Trade Date (EST)": format_last_trade_date(` +- Line 154: Execute the statement as written. Code: `row.get("lastTradeDate")` +- Line 155: Close the current block or container. Code: `),` +- Line 156: Execute the statement as written. Code: `"Strike": extract_raw_value(row.get("strike")),` +- Line 157: Execute the statement as written. Code: `"Last Price": extract_raw_value(row.get("lastPrice")),` +- Line 158: Execute the statement as written. Code: `"Bid": extract_raw_value(row.get("bid")),` +- Line 159: Execute the statement as written. Code: `"Ask": extract_raw_value(row.get("ask")),` +- Line 160: Execute the statement as written. Code: `"Change": extract_raw_value(row.get("change")),` +- Line 161: Execute the statement as written. Code: `"% Change": format_percent_value(row.get("percentChange")),` +- Line 162: Execute the statement as written. Code: `"Volume": extract_raw_value(row.get("volume")),` +- Line 163: Execute the statement as written. Code: `"Open Interest": extract_raw_value(row.get("openInterest")),` +- Line 164: Execute the statement as written. Code: `"Implied Volatility": format_percent_value(` +- Line 165: Execute the statement as written. Code: `row.get("impliedVolatility")` +- Line 166: Close the current block or container. Code: `),` +- Line 167: Close the current block or container. Code: `}` +- Line 168: Close the current block or container. Code: `)` +- Line 169: Return a value to the caller. Code: `return normalized` +- Line 170: Blank line for readability. Code: `` - Line 171: Blank line for readability. Code: `` -- Line 172: Enter a context manager block. Code: `with sync_playwright() as p:` -- Line 173: Launch a Playwright browser instance. Code: `browser = p.chromium.launch(headless=True)` -- Line 174: Create a new Playwright page. Code: `page = browser.new_page()` -- Line 175: Interact with the Playwright page. Code: `page.set_extra_http_headers(` -- Line 176: Execute the statement as written. Code: `{` -- Line 177: Execute the statement as written. Code: `"User-Agent": (` -- Line 178: Execute the statement as written. Code: `"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "` -- Line 179: Execute the statement as written. Code: `"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"` -- Line 180: Close the current block or container. Code: `)` -- Line 181: Close the current block or container. Code: `}` -- Line 182: Close the current block or container. Code: `)` -- Line 183: Interact with the Playwright page. Code: `page.set_default_timeout(60000)` +- Line 172: Define the build_rows_from_chain function. Code: `def build_rows_from_chain(chain):` +- Line 173: Execute the statement as written. Code: `result = chain.get("result", []) if chain else []` +- Line 174: Conditional branch. Code: `if not result:` +- Line 175: Return a value to the caller. Code: `return [], []` +- Line 176: Execute the statement as written. Code: `options = result[0].get("options", [])` +- Line 177: Conditional branch. Code: `if not options:` +- Line 178: Return a value to the caller. Code: `return [], []` +- Line 179: Execute the statement as written. Code: `option = options[0]` +- Line 180: Return a value to the caller. Code: `return (` +- Line 181: Execute the statement as written. Code: `normalize_chain_rows(option.get("calls")),` +- Line 182: Execute the statement as written. Code: `normalize_chain_rows(option.get("puts")),` +- Line 183: Close the current block or container. Code: `)` - Line 184: Blank line for readability. Code: `` -- Line 185: Start a try block for error handling. Code: `try:` -- Line 186: Conditional branch. Code: `if requested_expiration:` -- Line 187: Conditional branch. Code: `if requested_expiration.isdigit():` -- Line 188: Track the resolved expiration epoch timestamp. Code: `target_date = int(requested_expiration)` -- Line 189: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date` -- Line 190: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)` -- Line 191: Fallback branch. Code: `else:` -- Line 192: Execute the statement as written. Code: `parsed_date = parse_date(requested_expiration)` -- Line 193: Conditional branch. Code: `if parsed_date:` -- Line 194: Track the resolved expiration epoch timestamp. Code: `target_date = int(` -- Line 195: Execute the statement as written. Code: `datetime(` -- Line 196: Execute the statement as written. Code: `parsed_date.year,` -- Line 197: Execute the statement as written. Code: `parsed_date.month,` -- Line 198: Execute the statement as written. Code: `parsed_date.day,` -- Line 199: Execute the statement as written. Code: `tzinfo=timezone.utc,` -- Line 200: Execute the statement as written. Code: `).timestamp()` -- Line 201: Close the current block or container. Code: `)` -- Line 202: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date` -- Line 203: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)` -- Line 204: Fallback branch. Code: `else:` -- Line 205: Track whether a base-page lookup is needed. Code: `fallback_to_base = True` -- Line 206: Blank line for readability. Code: `` -- Line 207: Conditional branch. Code: `if target_date:` -- Line 208: Set the URL to load. Code: `url = f"{base_url}?date={target_date}"` -- Line 209: Blank line for readability. Code: `` -- Line 210: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)` -- Line 211: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)` -- Line 212: Blank line for readability. Code: `` -- Line 213: Capture the page HTML content. Code: `html = page.content()` -- Line 214: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_html(html)` -- Line 215: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)` -- Line 216: Blank line for readability. Code: `` -- Line 217: Conditional branch. Code: `if fallback_to_base:` -- Line 218: Execute the statement as written. Code: `resolved_value, resolved_label = resolve_expiration(` -- Line 219: Execute the statement as written. Code: `requested_expiration, expiration_options` -- Line 220: Close the current block or container. Code: `)` -- Line 221: Conditional branch. Code: `if resolved_value is None:` -- Line 222: Return a value to the caller. Code: `return {` -- Line 223: Execute the statement as written. Code: `"error": "Requested expiration not available",` -- Line 224: Execute the statement as written. Code: `"stock": symbol,` -- Line 225: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` -- Line 226: Execute the statement as written. Code: `"available_expirations": [` -- Line 227: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}` -- Line 228: Loop over items. Code: `for opt in expiration_options` -- Line 229: Close the current block or container. Code: `],` -- Line 230: Close the current block or container. Code: `}` -- Line 231: Blank line for readability. Code: `` -- Line 232: Track the resolved expiration epoch timestamp. Code: `target_date = resolved_value` -- Line 233: Track the resolved expiration metadata. Code: `selected_expiration_value = resolved_value` -- Line 234: Track the resolved expiration metadata. Code: `selected_expiration_label = resolved_label or format_expiration_label(` -- Line 235: Execute the statement as written. Code: `resolved_value` -- Line 236: Close the current block or container. Code: `)` -- Line 237: Set the URL to load. Code: `url = f"{base_url}?date={resolved_value}"` -- Line 238: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)` -- Line 239: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)` -- Line 240: Blank line for readability. Code: `` -- Line 241: Capture the page HTML content. Code: `html = page.content()` -- Line 242: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_html(html)` -- Line 243: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)` -- Line 244: Blank line for readability. Code: `` -- Line 245: Conditional branch. Code: `if target_date and expiration_options:` -- Line 246: Execute the statement as written. Code: `matched = None` -- Line 247: Loop over items. Code: `for opt in expiration_options:` -- Line 248: Conditional branch. Code: `if opt.get("value") == target_date:` -- Line 249: Execute the statement as written. Code: `matched = opt` -- Line 250: Execute the statement as written. Code: `break` -- Line 251: Conditional branch. Code: `if not matched:` -- Line 252: Return a value to the caller. Code: `return {` -- Line 253: Execute the statement as written. Code: `"error": "Requested expiration not available",` -- Line 254: Execute the statement as written. Code: `"stock": symbol,` -- Line 255: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` -- Line 256: Execute the statement as written. Code: `"available_expirations": [` -- Line 257: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}` -- Line 258: Loop over items. Code: `for opt in expiration_options` -- Line 259: Close the current block or container. Code: `],` -- Line 260: Close the current block or container. Code: `}` -- Line 261: Track the resolved expiration metadata. Code: `selected_expiration_label = matched.get("label")` -- Line 262: Alternative conditional branch. Code: `elif expiration_options and not target_date:` -- Line 263: Track the resolved expiration metadata. Code: `selected_expiration_value = expiration_options[0].get("value")` -- Line 264: Track the resolved expiration metadata. Code: `selected_expiration_label = expiration_options[0].get("label")` -- Line 265: Blank line for readability. Code: `` -- Line 266: Emit or configure a log message. Code: `app.logger.info("Waiting for options tables...")` -- Line 267: Blank line for readability. Code: `` -- Line 268: Collect option tables from the page. Code: `tables = wait_for_tables(page)` -- Line 269: Conditional branch. Code: `if len(tables) < 2:` -- Line 270: Emit or configure a log message. Code: `app.logger.error(` -- Line 271: Execute the statement as written. Code: `"Only %d tables found; expected 2. HTML may have changed.",` -- Line 272: Execute the statement as written. Code: `len(tables),` -- Line 273: Close the current block or container. Code: `)` -- Line 274: Return a value to the caller. Code: `return {"error": "Could not locate options tables", "stock": symbol}` -- Line 275: Blank line for readability. Code: `` -- Line 276: Emit or configure a log message. Code: `app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))` -- Line 277: Blank line for readability. Code: `` -- Line 278: Reserve storage for options table HTML. Code: `calls_html = tables[0].evaluate("el => el.outerHTML")` -- Line 279: Reserve storage for options table HTML. Code: `puts_html = tables[1].evaluate("el => el.outerHTML")` -- Line 280: Blank line for readability. Code: `` -- Line 281: Comment describing the next block. Code: `# --- Extract current price ---` -- Line 282: Start a try block for error handling. Code: `try:` -- Line 283: Comment describing the next block. Code: `# Primary selector` -- Line 284: Read the current price text from the page. Code: `price_text = page.locator(` -- Line 285: Execute the statement as written. Code: `"fin-streamer[data-field='regularMarketPrice']"` -- Line 286: Execute the statement as written. Code: `).inner_text()` -- Line 287: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))` -- Line 288: Handle exceptions for the preceding try block. Code: `except Exception:` -- Line 289: Start a try block for error handling. Code: `try:` -- Line 290: Comment describing the next block. Code: `# Fallback` -- Line 291: Read the current price text from the page. Code: `price_text = page.locator("span[data-testid='qsp-price']").inner_text()` -- Line 292: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))` -- Line 293: Handle exceptions for the preceding try block. Code: `except Exception as e:` -- Line 294: Emit or configure a log message. Code: `app.logger.warning("Failed to extract price for %s: %s", symbol, e)` -- Line 295: Blank line for readability. Code: `` -- Line 296: Emit or configure a log message. Code: `app.logger.info("Current price for %s = %s", symbol, price)` -- Line 297: Execute the statement as written. Code: `finally:` -- Line 298: Execute the statement as written. Code: `browser.close()` -- Line 299: Blank line for readability. Code: `` -- Line 300: Comment describing the next block. Code: `# ----------------------------------------------------------------------` -- Line 301: Comment describing the next block. Code: `# Parsing Table HTML` -- Line 302: Comment describing the next block. Code: `# ----------------------------------------------------------------------` +- Line 185: Blank line for readability. Code: `` +- Line 186: Define the extract_contract_expiry_code function. Code: `def extract_contract_expiry_code(contract_name):` +- Line 187: Conditional branch. Code: `if not contract_name:` +- Line 188: Return a value to the caller. Code: `return None` +- Line 189: Execute the statement as written. Code: `match = re.search(r"(\d{6})", contract_name)` +- Line 190: Return a value to the caller. Code: `return match.group(1) if match else None` +- Line 191: Blank line for readability. Code: `` +- Line 192: Blank line for readability. Code: `` +- Line 193: Define the expected_expiry_code function. Code: `def expected_expiry_code(timestamp):` +- Line 194: Conditional branch. Code: `if not timestamp:` +- Line 195: Return a value to the caller. Code: `return None` +- Line 196: Start a try block for error handling. Code: `try:` +- Line 197: Return a value to the caller. Code: `return datetime.utcfromtimestamp(timestamp).strftime("%y%m%d")` +- Line 198: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 199: Return a value to the caller. Code: `return None` +- Line 200: Blank line for readability. Code: `` +- Line 201: Blank line for readability. Code: `` +- Line 202: Define the extract_expiration_dates_from_html function. Code: `def extract_expiration_dates_from_html(html):` +- Line 203: Conditional branch. Code: `if not html:` +- Line 204: Return a value to the caller. Code: `return []` +- Line 205: Blank line for readability. Code: `` +- Line 206: Execute the statement as written. Code: `patterns = (` +- Line 207: Execute the statement as written. Code: `r'\\"expirationDates\\":\[(.*?)\]',` +- Line 208: Execute the statement as written. Code: `r'"expirationDates":\[(.*?)\]',` +- Line 209: Close the current block or container. Code: `)` +- Line 210: Execute the statement as written. Code: `match = None` +- Line 211: Loop over items. Code: `for pattern in patterns:` +- Line 212: Execute the statement as written. Code: `match = re.search(pattern, html, re.DOTALL)` +- Line 213: Conditional branch. Code: `if match:` +- Line 214: Execute the statement as written. Code: `break` +- Line 215: Conditional branch. Code: `if not match:` +- Line 216: Return a value to the caller. Code: `return []` +- Line 217: Blank line for readability. Code: `` +- Line 218: Execute the statement as written. Code: `raw = match.group(1)` +- Line 219: Execute the statement as written. Code: `values = []` +- Line 220: Loop over items. Code: `for part in raw.split(","):` +- Line 221: Execute the statement as written. Code: `part = part.strip()` +- Line 222: Conditional branch. Code: `if part.isdigit():` +- Line 223: Start a try block for error handling. Code: `try:` +- Line 224: Execute the statement as written. Code: `values.append(int(part))` +- Line 225: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 226: Execute the statement as written. Code: `continue` +- Line 227: Return a value to the caller. Code: `return values` +- Line 228: Blank line for readability. Code: `` +- Line 229: Blank line for readability. Code: `` +- Line 230: Define the build_expiration_options function. Code: `def build_expiration_options(expiration_dates):` +- Line 231: Execute the statement as written. Code: `options = []` +- Line 232: Loop over items. Code: `for value in expiration_dates or []:` +- Line 233: Start a try block for error handling. Code: `try:` +- Line 234: Execute the statement as written. Code: `value_int = int(value)` +- Line 235: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 236: Execute the statement as written. Code: `continue` +- Line 237: Blank line for readability. Code: `` +- Line 238: Execute the statement as written. Code: `label = format_expiration_label(value_int)` +- Line 239: Start a try block for error handling. Code: `try:` +- Line 240: Execute the statement as written. Code: `date_value = datetime.utcfromtimestamp(value_int).date()` +- Line 241: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 242: Execute the statement as written. Code: `date_value = None` +- Line 243: Blank line for readability. Code: `` +- Line 244: Execute the statement as written. Code: `options.append({"value": value_int, "label": label, "date": date_value})` +- Line 245: Return a value to the caller. Code: `return sorted(options, key=lambda x: x["value"])` +- Line 246: Blank line for readability. Code: `` +- Line 247: Blank line for readability. Code: `` +- Line 248: Define the resolve_expiration function. Code: `def resolve_expiration(expiration, options):` +- Line 249: Conditional branch. Code: `if not expiration:` +- Line 250: Return a value to the caller. Code: `return None, None` +- Line 251: Blank line for readability. Code: `` +- Line 252: Execute the statement as written. Code: `raw = expiration.strip()` +- Line 253: Conditional branch. Code: `if not raw:` +- Line 254: Return a value to the caller. Code: `return None, None` +- Line 255: Blank line for readability. Code: `` +- Line 256: Conditional branch. Code: `if raw.isdigit():` +- Line 257: Execute the statement as written. Code: `value = int(raw)` +- Line 258: Conditional branch. Code: `if options:` +- Line 259: Loop over items. Code: `for opt in options:` +- Line 260: Conditional branch. Code: `if opt.get("value") == value:` +- Line 261: Return a value to the caller. Code: `return value, opt.get("label")` +- Line 262: Return a value to the caller. Code: `return None, None` +- Line 263: Return a value to the caller. Code: `return value, format_expiration_label(value)` +- Line 264: Blank line for readability. Code: `` +- Line 265: Execute the statement as written. Code: `requested_date = parse_date(raw)` +- Line 266: Conditional branch. Code: `if requested_date:` +- Line 267: Loop over items. Code: `for opt in options:` +- Line 268: Conditional branch. Code: `if opt.get("date") == requested_date:` +- Line 269: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")` +- Line 270: Return a value to the caller. Code: `return None, None` +- Line 271: Blank line for readability. Code: `` +- Line 272: Execute the statement as written. Code: `normalized = normalize_label(raw)` +- Line 273: Loop over items. Code: `for opt in options:` +- Line 274: Conditional branch. Code: `if normalize_label(opt.get("label", "")) == normalized:` +- Line 275: Return a value to the caller. Code: `return opt.get("value"), opt.get("label")` +- Line 276: Blank line for readability. Code: `` +- Line 277: Return a value to the caller. Code: `return None, None` +- Line 278: Blank line for readability. Code: `` +- Line 279: Blank line for readability. Code: `` +- Line 280: Define the wait_for_tables function. Code: `def wait_for_tables(page):` +- Line 281: Start a try block for error handling. Code: `try:` +- Line 282: Interact with the Playwright page. Code: `page.wait_for_selector(` +- Line 283: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table",` +- Line 284: Execute the statement as written. Code: `timeout=30000,` +- Line 285: Close the current block or container. Code: `)` +- Line 286: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 287: Interact with the Playwright page. Code: `page.wait_for_selector("table", timeout=30000)` +- Line 288: Blank line for readability. Code: `` +- Line 289: Loop over items. Code: `for _ in range(30): # 30 * 1s = 30 seconds` +- Line 290: Collect option tables from the page. Code: `tables = page.query_selector_all(` +- Line 291: Execute the statement as written. Code: `"section[data-testid='options-list-table'] table"` +- Line 292: Close the current block or container. Code: `)` +- Line 293: Conditional branch. Code: `if len(tables) >= 2:` +- Line 294: Return a value to the caller. Code: `return tables` +- Line 295: Collect option tables from the page. Code: `tables = page.query_selector_all("table")` +- Line 296: Conditional branch. Code: `if len(tables) >= 2:` +- Line 297: Return a value to the caller. Code: `return tables` +- Line 298: Execute the statement as written. Code: `time.sleep(1)` +- Line 299: Return a value to the caller. Code: `return []` +- Line 300: Blank line for readability. Code: `` +- Line 301: Blank line for readability. Code: `` +- Line 302: Define the scrape_yahoo_options function. Code: `def scrape_yahoo_options(symbol, expiration=None):` - Line 303: Define the parse_table function. Code: `def parse_table(table_html, side):` - Line 304: Conditional branch. Code: `if not table_html:` - Line 305: Emit or configure a log message. Code: `app.logger.warning("No %s table HTML for %s", side, symbol)` @@ -357,68 +357,264 @@ - Line 342: Emit or configure a log message. Code: `app.logger.info("Parsed %d %s rows", len(parsed), side)` - Line 343: Return a value to the caller. Code: `return parsed` - Line 344: Blank line for readability. Code: `` -- Line 345: Parse the full calls and puts tables. Code: `calls_full = parse_table(calls_html, "calls")` -- Line 346: Parse the full calls and puts tables. Code: `puts_full = parse_table(puts_html, "puts")` -- Line 347: Blank line for readability. Code: `` -- Line 348: Comment describing the next block. Code: `# ----------------------------------------------------------------------` -- Line 349: Comment describing the next block. Code: `# Pruning logic` -- Line 350: Comment describing the next block. Code: `# ----------------------------------------------------------------------` -- Line 351: Define the prune_nearest function. Code: `def prune_nearest(options, price_value, limit=26, side=""):` -- Line 352: Conditional branch. Code: `if price_value is None:` -- Line 353: Return a value to the caller. Code: `return options, 0` -- Line 354: Blank line for readability. Code: `` -- Line 355: Filter options to numeric strike entries. Code: `numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]` -- Line 356: Blank line for readability. Code: `` -- Line 357: Conditional branch. Code: `if len(numeric) <= limit:` -- Line 358: Return a value to the caller. Code: `return numeric, 0` -- Line 359: Blank line for readability. Code: `` -- Line 360: Sort options by distance to current price. Code: `sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))` -- Line 361: Keep the closest strike entries. Code: `pruned = sorted_opts[:limit]` -- Line 362: Compute how many rows were pruned. Code: `pruned_count = len(options) - len(pruned)` -- Line 363: Return a value to the caller. Code: `return pruned, pruned_count` -- Line 364: Blank line for readability. Code: `` -- Line 365: Apply pruning to calls. Code: `calls, pruned_calls = prune_nearest(calls_full, price, side="calls")` -- Line 366: Apply pruning to puts. Code: `puts, pruned_puts = prune_nearest(puts_full, price, side="puts")` -- Line 367: Blank line for readability. Code: `` -- Line 368: Define the strike_range function. Code: `def strike_range(opts):` -- Line 369: Collect strike prices from the option list. Code: `strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]` -- Line 370: Return a value to the caller. Code: `return [min(strikes), max(strikes)] if strikes else [None, None]` -- Line 371: Blank line for readability. Code: `` -- Line 372: Return a value to the caller. Code: `return {` -- Line 373: Execute the statement as written. Code: `"stock": symbol,` -- Line 374: Execute the statement as written. Code: `"url": url,` -- Line 375: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` -- Line 376: Execute the statement as written. Code: `"selected_expiration": {` -- Line 377: Execute the statement as written. Code: `"value": selected_expiration_value,` -- Line 378: Execute the statement as written. Code: `"label": selected_expiration_label,` -- Line 379: Close the current block or container. Code: `},` -- Line 380: Execute the statement as written. Code: `"current_price": price,` -- Line 381: Execute the statement as written. Code: `"calls": calls,` -- Line 382: Execute the statement as written. Code: `"puts": puts,` -- Line 383: Execute the statement as written. Code: `"calls_strike_range": strike_range(calls),` -- Line 384: Execute the statement as written. Code: `"puts_strike_range": strike_range(puts),` -- Line 385: Execute the statement as written. Code: `"total_calls": len(calls),` -- Line 386: Execute the statement as written. Code: `"total_puts": len(puts),` -- Line 387: Execute the statement as written. Code: `"pruned_calls_count": pruned_calls,` -- Line 388: Execute the statement as written. Code: `"pruned_puts_count": pruned_puts,` -- Line 389: Close the current block or container. Code: `}` -- Line 390: Blank line for readability. Code: `` -- Line 391: Blank line for readability. Code: `` -- Line 392: Attach the route decorator to the handler. Code: `@app.route("/scrape_sync")` -- Line 393: Define the scrape_sync function. Code: `def scrape_sync():` -- Line 394: Read the stock symbol parameter. Code: `symbol = request.args.get("stock", "MSFT")` -- Line 395: Read the expiration parameters from the request. Code: `expiration = (` -- Line 396: Execute the statement as written. Code: `request.args.get("expiration")` -- Line 397: Execute the statement as written. Code: `or request.args.get("expiry")` -- Line 398: Execute the statement as written. Code: `or request.args.get("date")` -- Line 399: Close the current block or container. Code: `)` -- Line 400: Emit or configure a log message. Code: `app.logger.info(` -- Line 401: Execute the statement as written. Code: `"Received /scrape_sync request for symbol=%s expiration=%s",` -- Line 402: Execute the statement as written. Code: `symbol,` -- Line 403: Execute the statement as written. Code: `expiration,` -- Line 404: Close the current block or container. Code: `)` -- Line 405: Return a value to the caller. Code: `return jsonify(scrape_yahoo_options(symbol, expiration))` -- Line 406: Blank line for readability. Code: `` -- Line 407: Blank line for readability. Code: `` -- Line 408: Conditional branch. Code: `if __name__ == "__main__":` -- Line 409: Run the Flask development server. Code: `app.run(host="0.0.0.0", port=9777)` +- Line 345: Define the read_option_chain function. Code: `def read_option_chain(page):` +- Line 346: Capture the page HTML content. Code: `html = page.content()` +- Line 347: Execute the statement as written. Code: `option_chain = extract_option_chain_from_html(html)` +- Line 348: Conditional branch. Code: `if option_chain:` +- Line 349: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_chain(option_chain)` +- Line 350: Fallback branch. Code: `else:` +- Line 351: Extract expiration date timestamps from the HTML. Code: `expiration_dates = extract_expiration_dates_from_html(html)` +- Line 352: Return a value to the caller. Code: `return option_chain, expiration_dates` +- Line 353: Blank line for readability. Code: `` +- Line 354: Define the has_expected_expiry function. Code: `def has_expected_expiry(options, expected_code):` +- Line 355: Conditional branch. Code: `if not expected_code:` +- Line 356: Return a value to the caller. Code: `return False` +- Line 357: Loop over items. Code: `for row in options or []:` +- Line 358: Execute the statement as written. Code: `name = row.get("Contract Name")` +- Line 359: Conditional branch. Code: `if extract_contract_expiry_code(name) == expected_code:` +- Line 360: Return a value to the caller. Code: `return True` +- Line 361: Return a value to the caller. Code: `return False` +- Line 362: Blank line for readability. Code: `` +- Line 363: URL-encode the stock symbol. Code: `encoded = urllib.parse.quote(symbol, safe="")` +- Line 364: Build the base Yahoo Finance options URL. Code: `base_url = f"https://finance.yahoo.com/quote/{encoded}/options/"` +- Line 365: Normalize the expiration input string. Code: `requested_expiration = expiration.strip() if expiration else None` +- Line 366: Conditional branch. Code: `if not requested_expiration:` +- Line 367: Normalize the expiration input string. Code: `requested_expiration = None` +- Line 368: Set the URL to load. Code: `url = base_url` +- Line 369: Blank line for readability. Code: `` +- Line 370: Emit or configure a log message. Code: `app.logger.info(` +- Line 371: Execute the statement as written. Code: `"Starting scrape for symbol=%s expiration=%s url=%s",` +- Line 372: Execute the statement as written. Code: `symbol,` +- Line 373: Execute the statement as written. Code: `requested_expiration,` +- Line 374: Execute the statement as written. Code: `base_url,` +- Line 375: Close the current block or container. Code: `)` +- Line 376: Blank line for readability. Code: `` +- Line 377: Reserve storage for options table HTML. Code: `calls_html = None` +- Line 378: Reserve storage for options table HTML. Code: `puts_html = None` +- Line 379: Parse the full calls and puts tables. Code: `calls_full = []` +- Line 380: Parse the full calls and puts tables. Code: `puts_full = []` +- Line 381: Initialize or assign the current price. Code: `price = None` +- Line 382: Track the resolved expiration metadata. Code: `selected_expiration_value = None` +- Line 383: Track the resolved expiration metadata. Code: `selected_expiration_label = None` +- Line 384: Prepare or update the list of available expirations. Code: `expiration_options = []` +- Line 385: Track the resolved expiration epoch timestamp. Code: `target_date = None` +- Line 386: Track whether a base-page lookup is needed. Code: `fallback_to_base = False` +- Line 387: Blank line for readability. Code: `` +- Line 388: Enter a context manager block. Code: `with sync_playwright() as p:` +- Line 389: Launch a Playwright browser instance. Code: `browser = p.chromium.launch(headless=True)` +- Line 390: Create a new Playwright page. Code: `page = browser.new_page()` +- Line 391: Interact with the Playwright page. Code: `page.set_extra_http_headers(` +- Line 392: Execute the statement as written. Code: `{` +- Line 393: Execute the statement as written. Code: `"User-Agent": (` +- Line 394: Execute the statement as written. Code: `"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "` +- Line 395: Execute the statement as written. Code: `"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"` +- Line 396: Close the current block or container. Code: `)` +- Line 397: Close the current block or container. Code: `}` +- Line 398: Close the current block or container. Code: `)` +- Line 399: Interact with the Playwright page. Code: `page.set_default_timeout(60000)` +- Line 400: Blank line for readability. Code: `` +- Line 401: Start a try block for error handling. Code: `try:` +- Line 402: Conditional branch. Code: `if requested_expiration:` +- Line 403: Conditional branch. Code: `if requested_expiration.isdigit():` +- Line 404: Track the resolved expiration epoch timestamp. Code: `target_date = int(requested_expiration)` +- Line 405: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date` +- Line 406: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)` +- Line 407: Fallback branch. Code: `else:` +- Line 408: Execute the statement as written. Code: `parsed_date = parse_date(requested_expiration)` +- Line 409: Conditional branch. Code: `if parsed_date:` +- Line 410: Track the resolved expiration epoch timestamp. Code: `target_date = int(` +- Line 411: Execute the statement as written. Code: `datetime(` +- Line 412: Execute the statement as written. Code: `parsed_date.year,` +- Line 413: Execute the statement as written. Code: `parsed_date.month,` +- Line 414: Execute the statement as written. Code: `parsed_date.day,` +- Line 415: Execute the statement as written. Code: `tzinfo=timezone.utc,` +- Line 416: Execute the statement as written. Code: `).timestamp()` +- Line 417: Close the current block or container. Code: `)` +- Line 418: Track the resolved expiration metadata. Code: `selected_expiration_value = target_date` +- Line 419: Track the resolved expiration metadata. Code: `selected_expiration_label = format_expiration_label(target_date)` +- Line 420: Fallback branch. Code: `else:` +- Line 421: Track whether a base-page lookup is needed. Code: `fallback_to_base = True` +- Line 422: Blank line for readability. Code: `` +- Line 423: Conditional branch. Code: `if target_date:` +- Line 424: Set the URL to load. Code: `url = f"{base_url}?date={target_date}"` +- Line 425: Blank line for readability. Code: `` +- Line 426: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)` +- Line 427: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)` +- Line 428: Blank line for readability. Code: `` +- Line 429: Execute the statement as written. Code: `option_chain, expiration_dates = read_option_chain(page)` +- Line 430: Emit or configure a log message. Code: `app.logger.info("Option chain found: %s", bool(option_chain))` +- Line 431: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)` +- Line 432: Blank line for readability. Code: `` +- Line 433: Conditional branch. Code: `if fallback_to_base:` +- Line 434: Execute the statement as written. Code: `resolved_value, resolved_label = resolve_expiration(` +- Line 435: Execute the statement as written. Code: `requested_expiration, expiration_options` +- Line 436: Close the current block or container. Code: `)` +- Line 437: Conditional branch. Code: `if resolved_value is None:` +- Line 438: Return a value to the caller. Code: `return {` +- Line 439: Execute the statement as written. Code: `"error": "Requested expiration not available",` +- Line 440: Execute the statement as written. Code: `"stock": symbol,` +- Line 441: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` +- Line 442: Execute the statement as written. Code: `"available_expirations": [` +- Line 443: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}` +- Line 444: Loop over items. Code: `for opt in expiration_options` +- Line 445: Close the current block or container. Code: `],` +- Line 446: Close the current block or container. Code: `}` +- Line 447: Blank line for readability. Code: `` +- Line 448: Track the resolved expiration epoch timestamp. Code: `target_date = resolved_value` +- Line 449: Track the resolved expiration metadata. Code: `selected_expiration_value = resolved_value` +- Line 450: Track the resolved expiration metadata. Code: `selected_expiration_label = resolved_label or format_expiration_label(` +- Line 451: Execute the statement as written. Code: `resolved_value` +- Line 452: Close the current block or container. Code: `)` +- Line 453: Set the URL to load. Code: `url = f"{base_url}?date={resolved_value}"` +- Line 454: Navigate the Playwright page to the target URL. Code: `page.goto(url, wait_until="domcontentloaded", timeout=60000)` +- Line 455: Emit or configure a log message. Code: `app.logger.info("Page loaded (domcontentloaded) for %s", symbol)` +- Line 456: Blank line for readability. Code: `` +- Line 457: Execute the statement as written. Code: `option_chain, expiration_dates = read_option_chain(page)` +- Line 458: Prepare or update the list of available expirations. Code: `expiration_options = build_expiration_options(expiration_dates)` +- Line 459: Blank line for readability. Code: `` +- Line 460: Conditional branch. Code: `if target_date and expiration_options:` +- Line 461: Execute the statement as written. Code: `matched = None` +- Line 462: Loop over items. Code: `for opt in expiration_options:` +- Line 463: Conditional branch. Code: `if opt.get("value") == target_date:` +- Line 464: Execute the statement as written. Code: `matched = opt` +- Line 465: Execute the statement as written. Code: `break` +- Line 466: Conditional branch. Code: `if not matched:` +- Line 467: Return a value to the caller. Code: `return {` +- Line 468: Execute the statement as written. Code: `"error": "Requested expiration not available",` +- Line 469: Execute the statement as written. Code: `"stock": symbol,` +- Line 470: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` +- Line 471: Execute the statement as written. Code: `"available_expirations": [` +- Line 472: Execute the statement as written. Code: `{"label": opt.get("label"), "value": opt.get("value")}` +- Line 473: Loop over items. Code: `for opt in expiration_options` +- Line 474: Close the current block or container. Code: `],` +- Line 475: Close the current block or container. Code: `}` +- Line 476: Track the resolved expiration metadata. Code: `selected_expiration_value = matched.get("value")` +- Line 477: Track the resolved expiration metadata. Code: `selected_expiration_label = matched.get("label")` +- Line 478: Alternative conditional branch. Code: `elif expiration_options and not target_date:` +- Line 479: Track the resolved expiration metadata. Code: `selected_expiration_value = expiration_options[0].get("value")` +- Line 480: Track the resolved expiration metadata. Code: `selected_expiration_label = expiration_options[0].get("label")` +- Line 481: Blank line for readability. Code: `` +- Line 482: Execute the statement as written. Code: `calls_full, puts_full = build_rows_from_chain(option_chain)` +- Line 483: Emit or configure a log message. Code: `app.logger.info(` +- Line 484: Execute the statement as written. Code: `"Option chain rows: calls=%d puts=%d",` +- Line 485: Execute the statement as written. Code: `len(calls_full),` +- Line 486: Execute the statement as written. Code: `len(puts_full),` +- Line 487: Close the current block or container. Code: `)` +- Line 488: Blank line for readability. Code: `` +- Line 489: Conditional branch. Code: `if not calls_full and not puts_full:` +- Line 490: Emit or configure a log message. Code: `app.logger.info("Waiting for options tables...")` +- Line 491: Blank line for readability. Code: `` +- Line 492: Collect option tables from the page. Code: `tables = wait_for_tables(page)` +- Line 493: Conditional branch. Code: `if len(tables) < 2:` +- Line 494: Emit or configure a log message. Code: `app.logger.error(` +- Line 495: Execute the statement as written. Code: `"Only %d tables found; expected 2. HTML may have changed.",` +- Line 496: Execute the statement as written. Code: `len(tables),` +- Line 497: Close the current block or container. Code: `)` +- Line 498: Return a value to the caller. Code: `return {"error": "Could not locate options tables", "stock": symbol}` +- Line 499: Blank line for readability. Code: `` +- Line 500: Emit or configure a log message. Code: `app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))` +- Line 501: Blank line for readability. Code: `` +- Line 502: Reserve storage for options table HTML. Code: `calls_html = tables[0].evaluate("el => el.outerHTML")` +- Line 503: Reserve storage for options table HTML. Code: `puts_html = tables[1].evaluate("el => el.outerHTML")` +- Line 504: Blank line for readability. Code: `` +- Line 505: Comment describing the next block. Code: `# --- Extract current price ---` +- Line 506: Start a try block for error handling. Code: `try:` +- Line 507: Comment describing the next block. Code: `# Primary selector` +- Line 508: Read the current price text from the page. Code: `price_text = page.locator(` +- Line 509: Execute the statement as written. Code: `"fin-streamer[data-field='regularMarketPrice']"` +- Line 510: Execute the statement as written. Code: `).inner_text()` +- Line 511: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))` +- Line 512: Handle exceptions for the preceding try block. Code: `except Exception:` +- Line 513: Start a try block for error handling. Code: `try:` +- Line 514: Comment describing the next block. Code: `# Fallback` +- Line 515: Read the current price text from the page. Code: `price_text = page.locator("span[data-testid='qsp-price']").inner_text()` +- Line 516: Initialize or assign the current price. Code: `price = float(price_text.replace(",", ""))` +- Line 517: Handle exceptions for the preceding try block. Code: `except Exception as e:` +- Line 518: Emit or configure a log message. Code: `app.logger.warning("Failed to extract price for %s: %s", symbol, e)` +- Line 519: Blank line for readability. Code: `` +- Line 520: Emit or configure a log message. Code: `app.logger.info("Current price for %s = %s", symbol, price)` +- Line 521: Execute the statement as written. Code: `finally:` +- Line 522: Execute the statement as written. Code: `browser.close()` +- Line 523: Blank line for readability. Code: `` +- Line 524: Conditional branch. Code: `if not calls_full and not puts_full and calls_html and puts_html:` +- Line 525: Parse the full calls and puts tables. Code: `calls_full = parse_table(calls_html, "calls")` +- Line 526: Parse the full calls and puts tables. Code: `puts_full = parse_table(puts_html, "puts")` +- Line 527: Blank line for readability. Code: `` +- Line 528: Execute the statement as written. Code: `expected_code = expected_expiry_code(target_date)` +- Line 529: Conditional branch. Code: `if expected_code:` +- Line 530: Conditional branch. Code: `if not has_expected_expiry(calls_full, expected_code) and not has_expected_expiry(` +- Line 531: Execute the statement as written. Code: `puts_full, expected_code` +- Line 532: Close the current block or container. Code: `):` +- Line 533: Return a value to the caller. Code: `return {` +- Line 534: Execute the statement as written. Code: `"error": "Options chain does not match requested expiration",` +- Line 535: Execute the statement as written. Code: `"stock": symbol,` +- Line 536: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` +- Line 537: Execute the statement as written. Code: `"expected_expiration_code": expected_code,` +- Line 538: Execute the statement as written. Code: `"selected_expiration": {` +- Line 539: Execute the statement as written. Code: `"value": selected_expiration_value,` +- Line 540: Execute the statement as written. Code: `"label": selected_expiration_label,` +- Line 541: Close the current block or container. Code: `},` +- Line 542: Close the current block or container. Code: `}` +- Line 543: Blank line for readability. Code: `` +- Line 544: Comment describing the next block. Code: `# ----------------------------------------------------------------------` +- Line 545: Comment describing the next block. Code: `# Pruning logic` +- Line 546: Comment describing the next block. Code: `# ----------------------------------------------------------------------` +- Line 547: Define the prune_nearest function. Code: `def prune_nearest(options, price_value, limit=26, side=""):` +- Line 548: Conditional branch. Code: `if price_value is None:` +- Line 549: Return a value to the caller. Code: `return options, 0` +- Line 550: Blank line for readability. Code: `` +- Line 551: Filter options to numeric strike entries. Code: `numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]` +- Line 552: Blank line for readability. Code: `` +- Line 553: Conditional branch. Code: `if len(numeric) <= limit:` +- Line 554: Return a value to the caller. Code: `return numeric, 0` +- Line 555: Blank line for readability. Code: `` +- Line 556: Sort options by distance to current price. Code: `sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))` +- Line 557: Keep the closest strike entries. Code: `pruned = sorted_opts[:limit]` +- Line 558: Compute how many rows were pruned. Code: `pruned_count = len(options) - len(pruned)` +- Line 559: Return a value to the caller. Code: `return pruned, pruned_count` +- Line 560: Blank line for readability. Code: `` +- Line 561: Apply pruning to calls. Code: `calls, pruned_calls = prune_nearest(calls_full, price, side="calls")` +- Line 562: Apply pruning to puts. Code: `puts, pruned_puts = prune_nearest(puts_full, price, side="puts")` +- Line 563: Blank line for readability. Code: `` +- Line 564: Define the strike_range function. Code: `def strike_range(opts):` +- Line 565: Collect strike prices from the option list. Code: `strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]` +- Line 566: Return a value to the caller. Code: `return [min(strikes), max(strikes)] if strikes else [None, None]` +- Line 567: Blank line for readability. Code: `` +- Line 568: Return a value to the caller. Code: `return {` +- Line 569: Execute the statement as written. Code: `"stock": symbol,` +- Line 570: Execute the statement as written. Code: `"url": url,` +- Line 571: Execute the statement as written. Code: `"requested_expiration": requested_expiration,` +- Line 572: Execute the statement as written. Code: `"selected_expiration": {` +- Line 573: Execute the statement as written. Code: `"value": selected_expiration_value,` +- Line 574: Execute the statement as written. Code: `"label": selected_expiration_label,` +- Line 575: Close the current block or container. Code: `},` +- Line 576: Execute the statement as written. Code: `"current_price": price,` +- Line 577: Execute the statement as written. Code: `"calls": calls,` +- Line 578: Execute the statement as written. Code: `"puts": puts,` +- Line 579: Execute the statement as written. Code: `"calls_strike_range": strike_range(calls),` +- Line 580: Execute the statement as written. Code: `"puts_strike_range": strike_range(puts),` +- Line 581: Execute the statement as written. Code: `"total_calls": len(calls),` +- Line 582: Execute the statement as written. Code: `"total_puts": len(puts),` +- Line 583: Execute the statement as written. Code: `"pruned_calls_count": pruned_calls,` +- Line 584: Execute the statement as written. Code: `"pruned_puts_count": pruned_puts,` +- Line 585: Close the current block or container. Code: `}` +- Line 586: Blank line for readability. Code: `` +- Line 587: Blank line for readability. Code: `` +- Line 588: Attach the route decorator to the handler. Code: `@app.route("/scrape_sync")` +- Line 589: Define the scrape_sync function. Code: `def scrape_sync():` +- Line 590: Read the stock symbol parameter. Code: `symbol = request.args.get("stock", "MSFT")` +- Line 591: Read the expiration parameters from the request. Code: `expiration = (` +- Line 592: Execute the statement as written. Code: `request.args.get("expiration")` +- Line 593: Execute the statement as written. Code: `or request.args.get("expiry")` +- Line 594: Execute the statement as written. Code: `or request.args.get("date")` +- Line 595: Close the current block or container. Code: `)` +- Line 596: Emit or configure a log message. Code: `app.logger.info(` +- Line 597: Execute the statement as written. Code: `"Received /scrape_sync request for symbol=%s expiration=%s",` +- Line 598: Execute the statement as written. Code: `symbol,` +- Line 599: Execute the statement as written. Code: `expiration,` +- Line 600: Close the current block or container. Code: `)` +- Line 601: Return a value to the caller. Code: `return jsonify(scrape_yahoo_options(symbol, expiration))` +- Line 602: Blank line for readability. Code: `` +- Line 603: Blank line for readability. Code: `` +- Line 604: Conditional branch. Code: `if __name__ == "__main__":` +- Line 605: Run the Flask development server. Code: `app.run(host="0.0.0.0", port=9777)` diff --git a/scraper_service.py b/scraper_service.py index 3786cd0..22a91c8 100644 --- a/scraper_service.py +++ b/scraper_service.py @@ -4,6 +4,7 @@ from bs4 import BeautifulSoup from datetime import datetime, timezone import urllib.parse import logging +import json import re import time @@ -45,6 +46,159 @@ def format_expiration_label(timestamp): return str(timestamp) +def format_percent(value): + if value is None: + return None + try: + return f"{value * 100:.2f}%" + except Exception: + return None + + +def extract_raw_value(value): + if isinstance(value, dict): + return value.get("raw") + return value + + +def extract_fmt_value(value): + if isinstance(value, dict): + return value.get("fmt") + return None + + +def format_percent_value(value): + fmt = extract_fmt_value(value) + if fmt is not None: + return fmt + return format_percent(extract_raw_value(value)) + + +def format_last_trade_date(timestamp): + timestamp = extract_raw_value(timestamp) + if not timestamp: + return None + try: + return datetime.fromtimestamp(timestamp).strftime("%m/%d/%Y %I:%M %p") + " EST" + except Exception: + return None + + +def extract_option_chain_from_html(html): + if not html: + return None + + token = "\"body\":\"" + start = 0 + while True: + idx = html.find(token, start) + if idx == -1: + break + i = idx + len(token) + escaped = False + raw_chars = [] + while i < len(html): + ch = html[i] + if escaped: + raw_chars.append(ch) + escaped = False + else: + if ch == "\\": + raw_chars.append(ch) + escaped = True + elif ch == "\"": + break + else: + raw_chars.append(ch) + i += 1 + raw = "".join(raw_chars) + try: + body_text = json.loads(f"\"{raw}\"") + except json.JSONDecodeError: + start = idx + len(token) + continue + if "optionChain" not in body_text: + start = idx + len(token) + continue + try: + payload = json.loads(body_text) + except json.JSONDecodeError: + start = idx + len(token) + continue + option_chain = payload.get("optionChain") + if option_chain and option_chain.get("result"): + return option_chain + + start = idx + len(token) + + return None + + +def extract_expiration_dates_from_chain(chain): + if not chain: + return [] + + result = chain.get("result", []) + if not result: + return [] + return result[0].get("expirationDates", []) or [] + + +def normalize_chain_rows(rows): + normalized = [] + for row in rows or []: + normalized.append( + { + "Contract Name": row.get("contractSymbol"), + "Last Trade Date (EST)": format_last_trade_date( + row.get("lastTradeDate") + ), + "Strike": extract_raw_value(row.get("strike")), + "Last Price": extract_raw_value(row.get("lastPrice")), + "Bid": extract_raw_value(row.get("bid")), + "Ask": extract_raw_value(row.get("ask")), + "Change": extract_raw_value(row.get("change")), + "% Change": format_percent_value(row.get("percentChange")), + "Volume": extract_raw_value(row.get("volume")), + "Open Interest": extract_raw_value(row.get("openInterest")), + "Implied Volatility": format_percent_value( + row.get("impliedVolatility") + ), + } + ) + return normalized + + +def build_rows_from_chain(chain): + result = chain.get("result", []) if chain else [] + if not result: + return [], [] + options = result[0].get("options", []) + if not options: + return [], [] + option = options[0] + return ( + normalize_chain_rows(option.get("calls")), + normalize_chain_rows(option.get("puts")), + ) + + +def extract_contract_expiry_code(contract_name): + if not contract_name: + return None + match = re.search(r"(\d{6})", contract_name) + return match.group(1) if match else None + + +def expected_expiry_code(timestamp): + if not timestamp: + return None + try: + return datetime.utcfromtimestamp(timestamp).strftime("%y%m%d") + except Exception: + return None + + def extract_expiration_dates_from_html(html): if not html: return [] @@ -146,6 +300,66 @@ def wait_for_tables(page): def scrape_yahoo_options(symbol, expiration=None): + def parse_table(table_html, side): + if not table_html: + app.logger.warning("No %s table HTML for %s", side, symbol) + return [] + + soup = BeautifulSoup(table_html, "html.parser") + + headers = [th.get_text(strip=True) for th in soup.select("thead th")] + rows = soup.select("tbody tr") + + parsed = [] + for r in rows: + tds = r.find_all("td") + if len(tds) != len(headers): + continue + + item = {} + for i, c in enumerate(tds): + key = headers[i] + val = c.get_text(" ", strip=True) + + # Convert numeric fields + if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]: + try: + val = float(val.replace(",", "")) + except Exception: + val = None + elif key in ["Volume", "Open Interest"]: + try: + val = int(val.replace(",", "")) + except Exception: + val = None + elif val in ["-", ""]: + val = None + + item[key] = val + + parsed.append(item) + + app.logger.info("Parsed %d %s rows", len(parsed), side) + return parsed + + def read_option_chain(page): + html = page.content() + option_chain = extract_option_chain_from_html(html) + if option_chain: + expiration_dates = extract_expiration_dates_from_chain(option_chain) + else: + expiration_dates = extract_expiration_dates_from_html(html) + return option_chain, expiration_dates + + def has_expected_expiry(options, expected_code): + if not expected_code: + return False + for row in options or []: + name = row.get("Contract Name") + if extract_contract_expiry_code(name) == expected_code: + return True + return False + encoded = urllib.parse.quote(symbol, safe="") base_url = f"https://finance.yahoo.com/quote/{encoded}/options/" requested_expiration = expiration.strip() if expiration else None @@ -162,6 +376,8 @@ def scrape_yahoo_options(symbol, expiration=None): calls_html = None puts_html = None + calls_full = [] + puts_full = [] price = None selected_expiration_value = None selected_expiration_label = None @@ -210,8 +426,8 @@ def scrape_yahoo_options(symbol, expiration=None): page.goto(url, wait_until="domcontentloaded", timeout=60000) app.logger.info("Page loaded (domcontentloaded) for %s", symbol) - html = page.content() - expiration_dates = extract_expiration_dates_from_html(html) + option_chain, expiration_dates = read_option_chain(page) + app.logger.info("Option chain found: %s", bool(option_chain)) expiration_options = build_expiration_options(expiration_dates) if fallback_to_base: @@ -238,8 +454,7 @@ def scrape_yahoo_options(symbol, expiration=None): page.goto(url, wait_until="domcontentloaded", timeout=60000) app.logger.info("Page loaded (domcontentloaded) for %s", symbol) - html = page.content() - expiration_dates = extract_expiration_dates_from_html(html) + option_chain, expiration_dates = read_option_chain(page) expiration_options = build_expiration_options(expiration_dates) if target_date and expiration_options: @@ -258,25 +473,34 @@ def scrape_yahoo_options(symbol, expiration=None): for opt in expiration_options ], } + selected_expiration_value = matched.get("value") selected_expiration_label = matched.get("label") elif expiration_options and not target_date: selected_expiration_value = expiration_options[0].get("value") selected_expiration_label = expiration_options[0].get("label") - app.logger.info("Waiting for options tables...") + calls_full, puts_full = build_rows_from_chain(option_chain) + app.logger.info( + "Option chain rows: calls=%d puts=%d", + len(calls_full), + len(puts_full), + ) - tables = wait_for_tables(page) - if len(tables) < 2: - app.logger.error( - "Only %d tables found; expected 2. HTML may have changed.", - len(tables), - ) - return {"error": "Could not locate options tables", "stock": symbol} + if not calls_full and not puts_full: + app.logger.info("Waiting for options tables...") - app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables)) + tables = wait_for_tables(page) + if len(tables) < 2: + app.logger.error( + "Only %d tables found; expected 2. HTML may have changed.", + len(tables), + ) + return {"error": "Could not locate options tables", "stock": symbol} - calls_html = tables[0].evaluate("el => el.outerHTML") - puts_html = tables[1].evaluate("el => el.outerHTML") + app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables)) + + calls_html = tables[0].evaluate("el => el.outerHTML") + puts_html = tables[1].evaluate("el => el.outerHTML") # --- Extract current price --- try: @@ -297,53 +521,25 @@ def scrape_yahoo_options(symbol, expiration=None): finally: browser.close() - # ---------------------------------------------------------------------- - # Parsing Table HTML - # ---------------------------------------------------------------------- - def parse_table(table_html, side): - if not table_html: - app.logger.warning("No %s table HTML for %s", side, symbol) - return [] + if not calls_full and not puts_full and calls_html and puts_html: + calls_full = parse_table(calls_html, "calls") + puts_full = parse_table(puts_html, "puts") - soup = BeautifulSoup(table_html, "html.parser") - - headers = [th.get_text(strip=True) for th in soup.select("thead th")] - rows = soup.select("tbody tr") - - parsed = [] - for r in rows: - tds = r.find_all("td") - if len(tds) != len(headers): - continue - - item = {} - for i, c in enumerate(tds): - key = headers[i] - val = c.get_text(" ", strip=True) - - # Convert numeric fields - if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]: - try: - val = float(val.replace(",", "")) - except Exception: - val = None - elif key in ["Volume", "Open Interest"]: - try: - val = int(val.replace(",", "")) - except Exception: - val = None - elif val in ["-", ""]: - val = None - - item[key] = val - - parsed.append(item) - - app.logger.info("Parsed %d %s rows", len(parsed), side) - return parsed - - calls_full = parse_table(calls_html, "calls") - puts_full = parse_table(puts_html, "puts") + expected_code = expected_expiry_code(target_date) + if expected_code: + if not has_expected_expiry(calls_full, expected_code) and not has_expected_expiry( + puts_full, expected_code + ): + return { + "error": "Options chain does not match requested expiration", + "stock": symbol, + "requested_expiration": requested_expiration, + "expected_expiration_code": expected_code, + "selected_expiration": { + "value": selected_expiration_value, + "label": selected_expiration_label, + }, + } # ---------------------------------------------------------------------- # Pruning logic