Compare commits

..

9 Commits

5 changed files with 2278 additions and 543 deletions

1126
AGENTS.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/playwright/python:v1.50.0-jammy
FROM mcr.microsoft.com/playwright/python:v1.57.0-jammy
WORKDIR /app
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1
COPY scraper_service.py /app/scraper_service.py
RUN python -m pip install --no-cache-dir flask beautifulsoup4
RUN python -m pip install --no-cache-dir flask beautifulsoup4 playwright==1.57.0
EXPOSE 9777

File diff suppressed because it is too large Load Diff

199
scripts/test_cycles.py Normal file
View File

@@ -0,0 +1,199 @@
import argparse
import datetime
import json
import sys
import time
import urllib.parse
import urllib.request
DEFAULT_STOCKS = ["AAPL", "AMZN", "MSFT", "TSLA"]
DEFAULT_CYCLES = [None, 5, 10, 25, 50, 75, 100, 150, 200, 500]
def http_get(base_url, params, timeout):
query = urllib.parse.urlencode(params)
url = f"{base_url}?{query}"
with urllib.request.urlopen(url, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
def expected_code_from_epoch(epoch):
return datetime.datetime.utcfromtimestamp(epoch).strftime("%y%m%d")
def all_contracts_match(opts, expected_code):
for opt in opts:
name = opt.get("Contract Name") or ""
if expected_code not in name:
return False
return True
def parse_list(value, default):
if not value:
return default
return [item.strip() for item in value.split(",") if item.strip()]
def parse_cycles(value):
if not value:
return DEFAULT_CYCLES
cycles = []
for item in value.split(","):
token = item.strip().lower()
if not token or token in ("default", "none"):
cycles.append(None)
continue
try:
cycles.append(int(token))
except ValueError:
raise ValueError(f"Invalid strikeLimit value: {item}")
return cycles
def main():
parser = argparse.ArgumentParser(description="Yahoo options scraper test cycles")
parser.add_argument(
"--base-url",
default="http://127.0.0.1:9777/scrape_sync",
help="Base URL for /scrape_sync",
)
parser.add_argument(
"--stocks",
default=",".join(DEFAULT_STOCKS),
help="Comma-separated stock symbols",
)
parser.add_argument(
"--strike-limits",
default="default,5,10,25,50,75,100,150,200,500",
help="Comma-separated strike limits (use 'default' for the API default)",
)
parser.add_argument(
"--baseline-limit",
type=int,
default=5000,
help="Large strikeLimit used to capture all available strikes",
)
parser.add_argument(
"--timeout",
type=int,
default=180,
help="Request timeout in seconds",
)
parser.add_argument(
"--sleep",
type=float,
default=0.2,
help="Sleep between requests",
)
args = parser.parse_args()
stocks = parse_list(args.stocks, DEFAULT_STOCKS)
cycles = parse_cycles(args.strike_limits)
print("Fetching expiration lists...")
expirations = {}
for stock in stocks:
data = http_get(args.base_url, {"stock": stock, "expiration": "invalid"}, args.timeout)
if "available_expirations" not in data:
print(f"ERROR: missing available_expirations for {stock}: {data}")
sys.exit(1)
values = [opt.get("value") for opt in data["available_expirations"] if opt.get("value")]
if len(values) < 4:
print(f"ERROR: not enough expirations for {stock}: {values}")
sys.exit(1)
expirations[stock] = values[:4]
print(f" {stock}: {expirations[stock]}")
time.sleep(args.sleep)
print("\nBuilding baseline counts (strikeLimit=%d)..." % args.baseline_limit)
baseline_counts = {}
for stock, exp_list in expirations.items():
for exp in exp_list:
data = http_get(
args.base_url,
{"stock": stock, "expiration": exp, "strikeLimit": args.baseline_limit},
args.timeout,
)
if "error" in data:
print(f"ERROR: baseline error for {stock} {exp}: {data}")
sys.exit(1)
calls_count = data.get("total_calls")
puts_count = data.get("total_puts")
if calls_count is None or puts_count is None:
print(f"ERROR: baseline missing counts for {stock} {exp}: {data}")
sys.exit(1)
expected_code = expected_code_from_epoch(exp)
if not all_contracts_match(data.get("calls", []), expected_code):
print(f"ERROR: baseline calls mismatch for {stock} {exp}")
sys.exit(1)
if not all_contracts_match(data.get("puts", []), expected_code):
print(f"ERROR: baseline puts mismatch for {stock} {exp}")
sys.exit(1)
baseline_counts[(stock, exp)] = (calls_count, puts_count)
print(f" {stock} {exp}: calls={calls_count} puts={puts_count}")
time.sleep(args.sleep)
print("\nRunning %d cycles of API tests..." % len(cycles))
for idx, strike_limit in enumerate(cycles, start=1):
print(f"Cycle {idx}/{len(cycles)} (strikeLimit={strike_limit})")
for stock, exp_list in expirations.items():
for exp in exp_list:
params = {"stock": stock, "expiration": exp}
if strike_limit is not None:
params["strikeLimit"] = strike_limit
data = http_get(args.base_url, params, args.timeout)
if "error" in data:
print(f"ERROR: {stock} {exp} -> {data}")
sys.exit(1)
selected_val = data.get("selected_expiration", {}).get("value")
if selected_val != exp:
print(
f"ERROR: selected expiration mismatch for {stock} {exp}: {selected_val}"
)
sys.exit(1)
expected_code = expected_code_from_epoch(exp)
if not all_contracts_match(data.get("calls", []), expected_code):
print(f"ERROR: calls expiry mismatch for {stock} {exp}")
sys.exit(1)
if not all_contracts_match(data.get("puts", []), expected_code):
print(f"ERROR: puts expiry mismatch for {stock} {exp}")
sys.exit(1)
available_calls, available_puts = baseline_counts[(stock, exp)]
expected_limit = strike_limit if strike_limit is not None else 25
expected_calls = min(expected_limit, available_calls)
expected_puts = min(expected_limit, available_puts)
if data.get("total_calls") != expected_calls:
print(
f"ERROR: call count mismatch for {stock} {exp}: "
f"got {data.get('total_calls')} expected {expected_calls}"
)
sys.exit(1)
if data.get("total_puts") != expected_puts:
print(
f"ERROR: put count mismatch for {stock} {exp}: "
f"got {data.get('total_puts')} expected {expected_puts}"
)
sys.exit(1)
expected_pruned_calls = max(0, available_calls - expected_calls)
expected_pruned_puts = max(0, available_puts - expected_puts)
if data.get("pruned_calls_count") != expected_pruned_calls:
print(
f"ERROR: pruned calls mismatch for {stock} {exp}: "
f"got {data.get('pruned_calls_count')} expected {expected_pruned_calls}"
)
sys.exit(1)
if data.get("pruned_puts_count") != expected_pruned_puts:
print(
f"ERROR: pruned puts mismatch for {stock} {exp}: "
f"got {data.get('pruned_puts_count')} expected {expected_pruned_puts}"
)
sys.exit(1)
time.sleep(args.sleep)
print(f"Cycle {idx} OK")
print("\nAll cycles completed successfully.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,145 @@
import argparse
import json
import sys
import time
import urllib.parse
import urllib.request
DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"]
REQUIRED_SECTIONS = [
"key_metrics",
"valuation",
"profitability",
"growth",
"financial_strength",
"cashflow",
"ownership",
"analyst",
"earnings",
"performance",
]
REQUIRED_KEY_METRICS = [
"previous_close",
"open",
"bid",
"ask",
"beta",
"eps_trailing",
"dividend_rate",
"current_price",
]
def http_get(base_url, params, timeout):
query = urllib.parse.urlencode(params)
url = f"{base_url}?{query}"
with urllib.request.urlopen(url, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
def parse_list(value, default):
if not value:
return default
return [item.strip() for item in value.split(",") if item.strip()]
def build_signature(data):
return {
"key_metrics_keys": sorted(data.get("key_metrics", {}).keys()),
"valuation_keys": sorted(data.get("valuation", {}).keys()),
"profitability_keys": sorted(data.get("profitability", {}).keys()),
"growth_keys": sorted(data.get("growth", {}).keys()),
"financial_strength_keys": sorted(data.get("financial_strength", {}).keys()),
"cashflow_keys": sorted(data.get("cashflow", {}).keys()),
"ownership_keys": sorted(data.get("ownership", {}).keys()),
"analyst_keys": sorted(data.get("analyst", {}).keys()),
"earnings_keys": sorted(data.get("earnings", {}).keys()),
"performance_keys": sorted(data.get("performance", {}).keys()),
}
def validate_payload(symbol, data):
if "error" in data:
return f"API error for {symbol}: {data}"
if data.get("stock", "").upper() != symbol.upper():
return f"Symbol mismatch: expected {symbol} got {data.get('stock')}"
validation = data.get("validation", {})
if validation.get("symbol_match") is not True:
return f"Validation symbol_match failed for {symbol}: {validation}"
if validation.get("issues"):
return f"Validation issues for {symbol}: {validation}"
for section in REQUIRED_SECTIONS:
if section not in data:
return f"Missing section {section} for {symbol}"
key_metrics = data.get("key_metrics", {})
for field in REQUIRED_KEY_METRICS:
if field not in key_metrics:
return f"Missing key metric {field} for {symbol}"
return None
def main():
parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles")
parser.add_argument(
"--base-url",
default="http://127.0.0.1:9777/profile",
help="Base URL for /profile",
)
parser.add_argument(
"--symbols",
default=",".join(DEFAULT_SYMBOLS),
help="Comma-separated stock symbols",
)
parser.add_argument(
"--runs",
type=int,
default=8,
help="Number of validation runs per symbol",
)
parser.add_argument(
"--timeout",
type=int,
default=180,
help="Request timeout in seconds",
)
parser.add_argument(
"--sleep",
type=float,
default=0.2,
help="Sleep between requests",
)
args = parser.parse_args()
symbols = parse_list(args.symbols, DEFAULT_SYMBOLS)
signatures = {}
print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}")
for run in range(1, args.runs + 1):
print(f"Cycle {run}/{args.runs}")
for symbol in symbols:
data = http_get(args.base_url, {"stock": symbol}, args.timeout)
error = validate_payload(symbol, data)
if error:
print(f"ERROR: {error}")
sys.exit(1)
signature = build_signature(data)
if symbol not in signatures:
signatures[symbol] = signature
elif signatures[symbol] != signature:
print(f"ERROR: Signature changed for {symbol}")
print(f"Baseline: {signatures[symbol]}")
print(f"Current: {signature}")
sys.exit(1)
time.sleep(args.sleep)
print(f"Cycle {run} OK")
print("\nAll profile cycles completed successfully.")
if __name__ == "__main__":
main()