Fix the table not loading, also fix runner to install playwright
This commit is contained in:
46
runner.bat
46
runner.bat
@@ -1,24 +1,48 @@
|
|||||||
@echo off
|
@echo off
|
||||||
setlocal
|
setlocal ENABLEDELAYEDEXPANSION
|
||||||
|
|
||||||
:: Set the project folder to this script's directory
|
:: Set project directory to script's location
|
||||||
set "PROJECT_DIR=%~dp0"
|
set "PROJECT_DIR=%~dp0"
|
||||||
cd /d "%PROJECT_DIR%"
|
cd /d "%PROJECT_DIR%"
|
||||||
|
|
||||||
:: Check if venv folder exists; if not, create venv and install requirements
|
echo -----------------------------------------
|
||||||
|
echo Checking virtual environment...
|
||||||
|
echo -----------------------------------------
|
||||||
|
|
||||||
|
:: Create venv if missing
|
||||||
if not exist "venv\Scripts\python.exe" (
|
if not exist "venv\Scripts\python.exe" (
|
||||||
echo Creating virtual environment...
|
echo Creating virtual environment...
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
call venv\Scripts\activate.bat
|
|
||||||
echo Installing required packages...
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install flask selenium webdriver-manager beautifulsoup4
|
|
||||||
) else (
|
|
||||||
call venv\Scripts\activate.bat
|
|
||||||
)
|
)
|
||||||
|
|
||||||
:: Run the Flask server with logs redirected to server.log
|
:: Activate venv
|
||||||
echo Starting Flask server, logs will be written to server.log
|
call "venv\Scripts\activate.bat"
|
||||||
|
|
||||||
|
echo -----------------------------------------
|
||||||
|
echo Upgrading pip...
|
||||||
|
echo -----------------------------------------
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
|
||||||
|
echo -----------------------------------------
|
||||||
|
echo Installing Python prerequisites...
|
||||||
|
echo -----------------------------------------
|
||||||
|
|
||||||
|
pip install flask selenium webdriver-manager beautifulsoup4 playwright
|
||||||
|
|
||||||
|
echo -----------------------------------------
|
||||||
|
echo Installing Playwright browser binaries...
|
||||||
|
echo -----------------------------------------
|
||||||
|
playwright install || (
|
||||||
|
echo ERROR: Playwright install failed! Trying chromium only...
|
||||||
|
playwright install chromium
|
||||||
|
)
|
||||||
|
|
||||||
|
echo -----------------------------------------
|
||||||
|
echo All dependencies installed successfully!
|
||||||
|
echo Launching server...
|
||||||
|
echo -----------------------------------------
|
||||||
|
|
||||||
|
:: Start Flask server with persistent window
|
||||||
start "" cmd /k "venv\Scripts\python.exe scraper_service.py"
|
start "" cmd /k "venv\Scripts\python.exe scraper_service.py"
|
||||||
|
|
||||||
endlocal
|
endlocal
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ from playwright.sync_api import sync_playwright
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
# Configure logging
|
# Logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="%(asctime)s [%(levelname)s] %(message)s"
|
format="%(asctime)s [%(levelname)s] %(message)s"
|
||||||
@@ -24,144 +25,129 @@ def scrape_yahoo_options(symbol):
|
|||||||
browser = p.chromium.launch(headless=True)
|
browser = p.chromium.launch(headless=True)
|
||||||
page = browser.new_page()
|
page = browser.new_page()
|
||||||
page.set_extra_http_headers({
|
page.set_extra_http_headers({
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
"User-Agent": (
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118 Safari/537.36"
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Avoid networkidle on Yahoo (it rarely goes “idle” because of ads/streaming)
|
|
||||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||||
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
|
app.logger.info("Page loaded (domcontentloaded) for %s", symbol)
|
||||||
|
|
||||||
# Wait for the options tables
|
# --- FIXED: Yahoo changed all classnames. We no longer depend on them. ---
|
||||||
page.wait_for_selector(
|
# We simply wait until at least TWO <table> tags appear.
|
||||||
"section[data-testid='options-list-table'] table.yf-wurt5d",
|
app.logger.info("Waiting for options tables...")
|
||||||
timeout=30000
|
|
||||||
)
|
|
||||||
app.logger.info("Options tables located in DOM for %s", symbol)
|
|
||||||
|
|
||||||
# Grab CALLS and PUTS tables separately (first = Calls, second = Puts)
|
# Wait for any table to exist
|
||||||
tables = page.evaluate("""
|
page.wait_for_selector("table", timeout=30000)
|
||||||
() => {
|
|
||||||
const section = document.querySelector('section[data-testid="options-list-table"]');
|
|
||||||
if (!section) return { calls: null, puts: null };
|
|
||||||
|
|
||||||
const tbs = section.querySelectorAll('table.yf-wurt5d');
|
# Repeatedly check until 2 tables appear
|
||||||
const getHTML = el => el ? el.outerHTML : null;
|
for _ in range(30): # 30 × 1s = 30 seconds
|
||||||
|
tables = page.query_selector_all("table")
|
||||||
|
if len(tables) >= 2:
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
return {
|
tables = page.query_selector_all("table")
|
||||||
calls: getHTML(tbs[0] || null),
|
if len(tables) < 2:
|
||||||
puts: getHTML(tbs[1] || null)
|
app.logger.error("Only %d tables found — expected 2. HTML changed?", len(tables))
|
||||||
};
|
browser.close()
|
||||||
}
|
return {"error": "Could not locate options tables", "stock": symbol}
|
||||||
""")
|
|
||||||
|
|
||||||
calls_html = tables.get("calls") if tables else None
|
app.logger.info("Found %d tables. Extracting Calls & Puts.", len(tables))
|
||||||
puts_html = tables.get("puts") if tables else None
|
|
||||||
|
|
||||||
# Current price
|
calls_html = tables[0].evaluate("el => el.outerHTML")
|
||||||
|
puts_html = tables[1].evaluate("el => el.outerHTML")
|
||||||
|
|
||||||
|
# --- Extract current price ---
|
||||||
price = None
|
price = None
|
||||||
try:
|
try:
|
||||||
price_text = page.locator("span[data-testid='qsp-price']").inner_text()
|
# Primary selector
|
||||||
|
price_text = page.locator("fin-streamer[data-field='regularMarketPrice']").inner_text()
|
||||||
price = float(price_text.replace(",", ""))
|
price = float(price_text.replace(",", ""))
|
||||||
app.logger.info("Current price for %s = %s", symbol, price)
|
except:
|
||||||
except Exception as e:
|
try:
|
||||||
app.logger.warning("Failed to get current price for %s: %s", symbol, e)
|
# Fallback
|
||||||
|
price_text = page.locator("span[data-testid='qsp-price']").inner_text()
|
||||||
|
price = float(price_text.replace(",", ""))
|
||||||
|
except Exception as e:
|
||||||
|
app.logger.warning("Failed to extract price for %s: %s", symbol, e)
|
||||||
|
|
||||||
|
app.logger.info("Current price for %s = %s", symbol, price)
|
||||||
|
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
||||||
if not calls_html and not puts_html:
|
# ----------------------------------------------------------------------
|
||||||
app.logger.error("Could not locate options tables for %s", symbol)
|
# Parsing Table HTML
|
||||||
return {"error": "Could not locate options tables", "stock": symbol}
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
def parse_table(table_html, side):
|
def parse_table(table_html, side):
|
||||||
if not table_html:
|
if not table_html:
|
||||||
app.logger.warning("No %s table HTML present for %s", side, symbol)
|
app.logger.warning("No %s table HTML for %s", side, symbol)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
soup = BeautifulSoup(table_html, "html.parser")
|
soup = BeautifulSoup(table_html, "html.parser")
|
||||||
|
|
||||||
headers = [th.get_text(strip=True) for th in soup.select("thead th")]
|
headers = [th.get_text(strip=True) for th in soup.select("thead th")]
|
||||||
rows = soup.select("tbody tr")
|
rows = soup.select("tbody tr")
|
||||||
|
|
||||||
parsed_rows = []
|
parsed = []
|
||||||
for r in rows:
|
for r in rows:
|
||||||
cols = r.find_all("td")
|
tds = r.find_all("td")
|
||||||
if len(cols) != len(headers):
|
if len(tds) != len(headers):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
data = {}
|
item = {}
|
||||||
for i, c in enumerate(cols):
|
for i, c in enumerate(tds):
|
||||||
key = headers[i]
|
key = headers[i]
|
||||||
val = c.get_text(" ", strip=True)
|
val = c.get_text(" ", strip=True)
|
||||||
|
|
||||||
|
# Convert numeric fields
|
||||||
if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:
|
if key in ["Strike", "Last Price", "Bid", "Ask", "Change"]:
|
||||||
try:
|
try:
|
||||||
val = float(val.replace(",", ""))
|
val = float(val.replace(",", ""))
|
||||||
except Exception:
|
except:
|
||||||
val = None
|
val = None
|
||||||
elif key in ["Volume", "Open Interest"]:
|
elif key in ["Volume", "Open Interest"]:
|
||||||
try:
|
try:
|
||||||
val = int(val.replace(",", ""))
|
val = int(val.replace(",", ""))
|
||||||
except Exception:
|
except:
|
||||||
val = None
|
val = None
|
||||||
elif val in ["-", ""]:
|
elif val in ["-", ""]:
|
||||||
val = None
|
val = None
|
||||||
|
|
||||||
data[key] = val
|
item[key] = val
|
||||||
|
|
||||||
parsed_rows.append(data)
|
parsed.append(item)
|
||||||
|
|
||||||
app.logger.info("Parsed %d %s rows for %s", len(parsed_rows), side, symbol)
|
app.logger.info("Parsed %d %s rows", len(parsed), side)
|
||||||
return parsed_rows
|
return parsed
|
||||||
|
|
||||||
calls_full = parse_table(calls_html, "calls")
|
calls_full = parse_table(calls_html, "calls")
|
||||||
puts_full = parse_table(puts_html, "puts")
|
puts_full = parse_table(puts_html, "puts")
|
||||||
|
|
||||||
def rng(opts):
|
# ----------------------------------------------------------------------
|
||||||
strikes = [r.get("Strike") for r in opts
|
# Pruning logic
|
||||||
if isinstance(r.get("Strike"), (int, float))]
|
# ----------------------------------------------------------------------
|
||||||
return [min(strikes), max(strikes)] if strikes else [None, None]
|
|
||||||
|
|
||||||
def prune_nearest(options, price_value, limit=26, side=""):
|
def prune_nearest(options, price_value, limit=26, side=""):
|
||||||
if price_value is None:
|
if price_value is None:
|
||||||
app.logger.info(
|
|
||||||
"No current price for %s; skipping pruning for %s (keeping %d rows)",
|
|
||||||
symbol, side, len(options)
|
|
||||||
)
|
|
||||||
return options, 0
|
return options, 0
|
||||||
|
|
||||||
numeric_opts = [o for o in options if isinstance(o.get("Strike"), (int, float))]
|
numeric = [o for o in options if isinstance(o.get("Strike"), (int, float))]
|
||||||
if len(numeric_opts) <= limit:
|
|
||||||
app.logger.info(
|
|
||||||
"Not enough %s rows for pruning for %s: total=%d, limit=%d",
|
|
||||||
side, symbol, len(numeric_opts), limit
|
|
||||||
)
|
|
||||||
return numeric_opts, 0
|
|
||||||
|
|
||||||
sorted_opts = sorted(
|
if len(numeric) <= limit:
|
||||||
numeric_opts,
|
return numeric, 0
|
||||||
key=lambda o: abs(o["Strike"] - price_value)
|
|
||||||
)
|
|
||||||
pruned_list = sorted_opts[:limit]
|
|
||||||
pruned_count = len(options) - len(pruned_list)
|
|
||||||
|
|
||||||
app.logger.info(
|
sorted_opts = sorted(numeric, key=lambda x: abs(x["Strike"] - price_value))
|
||||||
"Pruned %s for %s: original=%d, kept=%d, pruned=%d (limit=%d)",
|
pruned = sorted_opts[:limit]
|
||||||
side, symbol, len(options), len(pruned_list), pruned_count, limit
|
pruned_count = len(options) - len(pruned)
|
||||||
)
|
return pruned, pruned_count
|
||||||
return pruned_list, pruned_count
|
|
||||||
|
|
||||||
# ✅ 26 closest by strike on each side
|
calls, pruned_calls = prune_nearest(calls_full, price, side="calls")
|
||||||
calls, pruned_calls_count = prune_nearest(calls_full, price, limit=26, side="calls")
|
puts, pruned_puts = prune_nearest(puts_full, price, side="puts")
|
||||||
puts, pruned_puts_count = prune_nearest(puts_full, price, limit=26, side="puts")
|
|
||||||
|
|
||||||
calls_range = rng(calls)
|
def strike_range(opts):
|
||||||
puts_range = rng(puts)
|
strikes = [o["Strike"] for o in opts if isinstance(o.get("Strike"), (int, float))]
|
||||||
|
return [min(strikes), max(strikes)] if strikes else [None, None]
|
||||||
app.logger.info(
|
|
||||||
"Final summary for %s: calls_kept=%d, puts_kept=%d, "
|
|
||||||
"calls_strike_range=%s, puts_strike_range=%s",
|
|
||||||
symbol, len(calls), len(puts), calls_range, puts_range
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"stock": symbol,
|
"stock": symbol,
|
||||||
@@ -169,20 +155,21 @@ def scrape_yahoo_options(symbol):
|
|||||||
"current_price": price,
|
"current_price": price,
|
||||||
"calls": calls,
|
"calls": calls,
|
||||||
"puts": puts,
|
"puts": puts,
|
||||||
"calls_strike_range": calls_range,
|
"calls_strike_range": strike_range(calls),
|
||||||
"puts_strike_range": puts_range,
|
"puts_strike_range": strike_range(puts),
|
||||||
"total_calls": len(calls),
|
"total_calls": len(calls),
|
||||||
"total_puts": len(puts),
|
"total_puts": len(puts),
|
||||||
"pruned_calls_count": pruned_calls_count,
|
"pruned_calls_count": pruned_calls,
|
||||||
"pruned_puts_count": pruned_puts_count,
|
"pruned_puts_count": pruned_puts,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.route("/scrape_sync")
|
@app.route("/scrape_sync")
|
||||||
def scrape_sync():
|
def scrape_sync():
|
||||||
symbol = request.args.get("stock", "MSFT")
|
symbol = request.args.get("stock", "MSFT")
|
||||||
app.logger.info("Received /scrape_sync request for symbol=%s", symbol)
|
app.logger.info("Received /scrape_sync request for symbol=%s", symbol)
|
||||||
data = scrape_yahoo_options(symbol)
|
return jsonify(scrape_yahoo_options(symbol))
|
||||||
return jsonify(data)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(host="0.0.0.0", port=9777)
|
app.run(host="0.0.0.0", port=9777)
|
||||||
|
|||||||
Reference in New Issue
Block a user