diff --git a/runner.bat b/runner.bat
index d43b5f6..4f6dc4f 100644
--- a/runner.bat
+++ b/runner.bat
@@ -1,24 +1,48 @@
@echo off
-setlocal
+setlocal ENABLEDELAYEDEXPANSION
-:: Set the project folder to this script's directory
+:: Set project directory to script's location
set "PROJECT_DIR=%~dp0"
cd /d "%PROJECT_DIR%"
-:: Check if venv folder exists; if not, create venv and install requirements
+echo -----------------------------------------
+echo Checking virtual environment...
+echo -----------------------------------------
+
+:: Create venv if missing
if not exist "venv\Scripts\python.exe" (
echo Creating virtual environment...
python -m venv venv
- call venv\Scripts\activate.bat
- echo Installing required packages...
- pip install --upgrade pip
- pip install flask selenium webdriver-manager beautifulsoup4
-) else (
- call venv\Scripts\activate.bat
)
-:: Run the Flask server with logs redirected to server.log
-echo Starting Flask server, logs will be written to server.log
+:: Activate venv
+call "venv\Scripts\activate.bat"
+
+echo -----------------------------------------
+echo Upgrading pip...
+echo -----------------------------------------
+python -m pip install --upgrade pip
+
+echo -----------------------------------------
+echo Installing Python prerequisites...
+echo -----------------------------------------
+
+pip install flask selenium webdriver-manager beautifulsoup4 playwright
+
+echo -----------------------------------------
+echo Installing Playwright browser binaries...
+echo -----------------------------------------
+playwright install || (
+ echo ERROR: Playwright install failed! Trying chromium only...
+ playwright install chromium
+)
+
+echo -----------------------------------------
+echo All dependencies installed successfully!
+echo Launching server...
+echo -----------------------------------------
+
+:: Start Flask server with persistent window
start "" cmd /k "venv\Scripts\python.exe scraper_service.py"
endlocal
diff --git a/scraper_service.py b/scraper_service.py
index 5062540..9b8fb61 100644
--- a/scraper_service.py
+++ b/scraper_service.py
@@ -3,10 +3,11 @@ from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import urllib.parse
import logging
+import time
app = Flask(__name__)
-# Configure logging
+# Logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s"
@@ -24,144 +25,129 @@ def scrape_yahoo_options(symbol):
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.set_extra_http_headers({
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
- "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118 Safari/537.36"
+ "User-Agent": (
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
+ )
})
- # Avoid networkidle on Yahoo (it rarely goes “idle” because of ads/streaming)
page.goto(url, wait_until="domcontentloaded", timeout=60000)
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
- # Wait for the options tables
- page.wait_for_selector(
- "section[data-testid='options-list-table'] table.yf-wurt5d",
- timeout=30000
- )
- app.logger.info("Options tables located in DOM for %s", symbol)
+ # --- FIXED: Yahoo changed all classnames. We no longer depend on them. ---
+ # We simply wait until at least TWO
tags appear.
+ app.logger.info("Waiting for options tables...")
- # Grab CALLS and PUTS tables separately (first = Calls, second = Puts)
- tables = page.evaluate("""
- () => {
- const section = document.querySelector('section[data-testid="options-list-table"]');
- if (!section) return { calls: null, puts: null };
+ # Wait for any table to exist
+ page.wait_for_selector("table", timeout=30000)
- const tbs = section.querySelectorAll('table.yf-wurt5d');
- const getHTML = el => el ? el.outerHTML : null;
+ # Repeatedly check until 2 tables appear
+ for _ in range(30): # 30 × 1s = 30 seconds
+ tables = page.query_selector_all("table")
+ if len(tables) >= 2:
+ break
+ time.sleep(1)
- return {
- calls: getHTML(tbs[0] || null),
- puts: getHTML(tbs[1] || null)
- };
- }
- """)
+ tables = page.query_selector_all("table")
+ if len(tables) < 2:
+ app.logger.error("Only %d tables found — expected 2. HTML changed?", len(tables))
+ browser.close()
+ return {"error": "Could not locate options tables", "stock": symbol}
- calls_html = tables.get("calls") if tables else None
- puts_html = tables.get("puts") if tables else None
+ app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))
- # Current price
+ calls_html = tables[0].evaluate("el => el.outerHTML")
+ puts_html = tables[1].evaluate("el => el.outerHTML")
+
+ # --- Extract current price ---
price = None
try:
- price_text = page.locator("span[data-testid='qsp-price']").inner_text()
+ # Primary selector
+ price_text = page.locator("fin-streamer[data-field='regularMarketPrice']").inner_text()
price = float(price_text.replace(",", ""))
- app.logger.info("Current price for %s = %s", symbol, price)
- except Exception as e:
- app.logger.warning("Failed to get current price for %s: %s", symbol, e)
+ except:
+ try:
+ # Fallback
+ price_text = page.locator("span[data-testid='qsp-price']").inner_text()
+ price = float(price_text.replace(",", ""))
+ except Exception as e:
+ app.logger.warning("Failed to extract price for %s: %s", symbol, e)
+
+ app.logger.info("Current price for %s = %s", symbol, price)
browser.close()
- if not calls_html and not puts_html:
- app.logger.error("Could not locate options tables for %s", symbol)
- return {"error": "Could not locate options tables", "stock": symbol}
-
+ # ----------------------------------------------------------------------
+ # Parsing Table HTML
+ # ----------------------------------------------------------------------
def parse_table(table_html, side):
if not table_html:
- app.logger.warning("No %s table HTML present for %s", side, symbol)
+ app.logger.warning("No %s table HTML for %s", side, symbol)
return []
soup = BeautifulSoup(table_html, "html.parser")
+
headers = [th.get_text(strip=True) for th in soup.select("thead th")]
rows = soup.select("tbody tr")
- parsed_rows = []
+ parsed = []
for r in rows:
- cols = r.find_all("td")
- if len(cols) != len(headers):
+ tds = r.find_all("td")
+ if len(tds) != len(headers):
continue
- data = {}
- for i, c in enumerate(cols):
+ item = {}
+ for i, c in enumerate(tds):
key = headers[i]
val = c.get_text(" ", strip=True)
+ # Convert numeric fields
if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:
try:
val = float(val.replace(",", ""))
- except Exception:
+ except:
val = None
elif key in ["Volume", "Open Interest"]:
try:
val = int(val.replace(",", ""))
- except Exception:
+ except:
val = None
elif val in ["-", ""]:
val = None
- data[key] = val
+ item[key] = val
- parsed_rows.append(data)
+ parsed.append(item)
- app.logger.info("Parsed %d %s rows for %s", len(parsed_rows), side, symbol)
- return parsed_rows
+ app.logger.info("Parsed %d %s rows", len(parsed), side)
+ return parsed
calls_full = parse_table(calls_html, "calls")
puts_full = parse_table(puts_html, "puts")
- def rng(opts):
- strikes = [r.get("Strike") for r in opts
- if isinstance(r.get("Strike"), (int, float))]
- return [min(strikes), max(strikes)] if strikes else [None, None]
-
+ # ----------------------------------------------------------------------
+ # Pruning logic
+ # ----------------------------------------------------------------------
def prune_nearest(options, price_value, limit=26, side=""):
if price_value is None:
- app.logger.info(
- "No current price for %s; skipping pruning for %s (keeping %d rows)",
- symbol, side, len(options)
- )
return options, 0
- numeric_opts = [o for o in options if isinstance(o.get("Strike"), (int, float))]
- if len(numeric_opts) <= limit:
- app.logger.info(
- "Not enough %s rows for pruning for %s: total=%d, limit=%d",
- side, symbol, len(numeric_opts), limit
- )
- return numeric_opts, 0
+ numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]
- sorted_opts = sorted(
- numeric_opts,
- key=lambda o: abs(o["Strike"] - price_value)
- )
- pruned_list = sorted_opts[:limit]
- pruned_count = len(options) - len(pruned_list)
+ if len(numeric) <= limit:
+ return numeric, 0
- app.logger.info(
- "Pruned %s for %s: original=%d, kept=%d, pruned=%d (limit=%d)",
- side, symbol, len(options), len(pruned_list), pruned_count, limit
- )
- return pruned_list, pruned_count
+ sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))
+ pruned = sorted_opts[:limit]
+ pruned_count = len(options) - len(pruned)
+ return pruned, pruned_count
- # ✅ 26 closest by strike on each side
- calls, pruned_calls_count = prune_nearest(calls_full, price, limit=26, side="calls")
- puts, pruned_puts_count = prune_nearest(puts_full, price, limit=26, side="puts")
+ calls, pruned_calls = prune_nearest(calls_full, price, side="calls")
+ puts, pruned_puts = prune_nearest(puts_full, price, side="puts")
- calls_range = rng(calls)
- puts_range = rng(puts)
-
- app.logger.info(
- "Final summary for %s: calls_kept=%d, puts_kept=%d, "
- "calls_strike_range=%s, puts_strike_range=%s",
- symbol, len(calls), len(puts), calls_range, puts_range
- )
+ def strike_range(opts):
+ strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]
+ return [min(strikes), max(strikes)] if strikes else [None, None]
return {
"stock": symbol,
@@ -169,20 +155,21 @@ def scrape_yahoo_options(symbol):
"current_price": price,
"calls": calls,
"puts": puts,
- "calls_strike_range": calls_range,
- "puts_strike_range": puts_range,
+ "calls_strike_range": strike_range(calls),
+ "puts_strike_range": strike_range(puts),
"total_calls": len(calls),
"total_puts": len(puts),
- "pruned_calls_count": pruned_calls_count,
- "pruned_puts_count": pruned_puts_count,
+ "pruned_calls_count": pruned_calls,
+ "pruned_puts_count": pruned_puts,
}
+
@app.route("/scrape_sync")
def scrape_sync():
symbol = request.args.get("stock", "MSFT")
app.logger.info("Received /scrape_sync request for symbol=%s", symbol)
- data = scrape_yahoo_options(symbol)
- return jsonify(data)
+ return jsonify(scrape_yahoo_options(symbol))
+
if __name__ == "__main__":
app.run(host="0.0.0.0", port=9777)