Compare commits
9 Commits
690887a6ec
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83a5e843c0 | ||
|
|
4e02c6ce0a | ||
|
|
c01a98abce | ||
|
|
68805ed80a | ||
| 711d87a998 | |||
| bce40014ad | |||
| 50a7ef119a | |||
| 4ab0e22047 | |||
| 67b8fad423 |
@@ -1,4 +1,4 @@
|
|||||||
FROM mcr.microsoft.com/playwright/python:v1.50.0-jammy
|
FROM mcr.microsoft.com/playwright/python:v1.57.0-jammy
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1
|
|||||||
|
|
||||||
COPY scraper_service.py /app/scraper_service.py
|
COPY scraper_service.py /app/scraper_service.py
|
||||||
|
|
||||||
RUN python -m pip install --no-cache-dir flask beautifulsoup4
|
RUN python -m pip install --no-cache-dir flask beautifulsoup4 playwright==1.57.0
|
||||||
|
|
||||||
EXPOSE 9777
|
EXPOSE 9777
|
||||||
|
|
||||||
|
|||||||
1347
scraper_service.py
1347
scraper_service.py
File diff suppressed because it is too large
Load Diff
199
scripts/test_cycles.py
Normal file
199
scripts/test_cycles.py
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
DEFAULT_STOCKS = ["AAPL", "AMZN", "MSFT", "TSLA"]
|
||||||
|
DEFAULT_CYCLES = [None, 5, 10, 25, 50, 75, 100, 150, 200, 500]
|
||||||
|
|
||||||
|
|
||||||
|
def http_get(base_url, params, timeout):
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
url = f"{base_url}?{query}"
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def expected_code_from_epoch(epoch):
|
||||||
|
return datetime.datetime.utcfromtimestamp(epoch).strftime("%y%m%d")
|
||||||
|
|
||||||
|
|
||||||
|
def all_contracts_match(opts, expected_code):
|
||||||
|
for opt in opts:
|
||||||
|
name = opt.get("Contract Name") or ""
|
||||||
|
if expected_code not in name:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def parse_list(value, default):
|
||||||
|
if not value:
|
||||||
|
return default
|
||||||
|
return [item.strip() for item in value.split(",") if item.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_cycles(value):
|
||||||
|
if not value:
|
||||||
|
return DEFAULT_CYCLES
|
||||||
|
cycles = []
|
||||||
|
for item in value.split(","):
|
||||||
|
token = item.strip().lower()
|
||||||
|
if not token or token in ("default", "none"):
|
||||||
|
cycles.append(None)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
cycles.append(int(token))
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"Invalid strikeLimit value: {item}")
|
||||||
|
return cycles
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Yahoo options scraper test cycles")
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-url",
|
||||||
|
default="http://127.0.0.1:9777/scrape_sync",
|
||||||
|
help="Base URL for /scrape_sync",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--stocks",
|
||||||
|
default=",".join(DEFAULT_STOCKS),
|
||||||
|
help="Comma-separated stock symbols",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--strike-limits",
|
||||||
|
default="default,5,10,25,50,75,100,150,200,500",
|
||||||
|
help="Comma-separated strike limits (use 'default' for the API default)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--baseline-limit",
|
||||||
|
type=int,
|
||||||
|
default=5000,
|
||||||
|
help="Large strikeLimit used to capture all available strikes",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--timeout",
|
||||||
|
type=int,
|
||||||
|
default=180,
|
||||||
|
help="Request timeout in seconds",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--sleep",
|
||||||
|
type=float,
|
||||||
|
default=0.2,
|
||||||
|
help="Sleep between requests",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
stocks = parse_list(args.stocks, DEFAULT_STOCKS)
|
||||||
|
cycles = parse_cycles(args.strike_limits)
|
||||||
|
|
||||||
|
print("Fetching expiration lists...")
|
||||||
|
expirations = {}
|
||||||
|
for stock in stocks:
|
||||||
|
data = http_get(args.base_url, {"stock": stock, "expiration": "invalid"}, args.timeout)
|
||||||
|
if "available_expirations" not in data:
|
||||||
|
print(f"ERROR: missing available_expirations for {stock}: {data}")
|
||||||
|
sys.exit(1)
|
||||||
|
values = [opt.get("value") for opt in data["available_expirations"] if opt.get("value")]
|
||||||
|
if len(values) < 4:
|
||||||
|
print(f"ERROR: not enough expirations for {stock}: {values}")
|
||||||
|
sys.exit(1)
|
||||||
|
expirations[stock] = values[:4]
|
||||||
|
print(f" {stock}: {expirations[stock]}")
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
|
||||||
|
print("\nBuilding baseline counts (strikeLimit=%d)..." % args.baseline_limit)
|
||||||
|
baseline_counts = {}
|
||||||
|
for stock, exp_list in expirations.items():
|
||||||
|
for exp in exp_list:
|
||||||
|
data = http_get(
|
||||||
|
args.base_url,
|
||||||
|
{"stock": stock, "expiration": exp, "strikeLimit": args.baseline_limit},
|
||||||
|
args.timeout,
|
||||||
|
)
|
||||||
|
if "error" in data:
|
||||||
|
print(f"ERROR: baseline error for {stock} {exp}: {data}")
|
||||||
|
sys.exit(1)
|
||||||
|
calls_count = data.get("total_calls")
|
||||||
|
puts_count = data.get("total_puts")
|
||||||
|
if calls_count is None or puts_count is None:
|
||||||
|
print(f"ERROR: baseline missing counts for {stock} {exp}: {data}")
|
||||||
|
sys.exit(1)
|
||||||
|
expected_code = expected_code_from_epoch(exp)
|
||||||
|
if not all_contracts_match(data.get("calls", []), expected_code):
|
||||||
|
print(f"ERROR: baseline calls mismatch for {stock} {exp}")
|
||||||
|
sys.exit(1)
|
||||||
|
if not all_contracts_match(data.get("puts", []), expected_code):
|
||||||
|
print(f"ERROR: baseline puts mismatch for {stock} {exp}")
|
||||||
|
sys.exit(1)
|
||||||
|
baseline_counts[(stock, exp)] = (calls_count, puts_count)
|
||||||
|
print(f" {stock} {exp}: calls={calls_count} puts={puts_count}")
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
|
||||||
|
print("\nRunning %d cycles of API tests..." % len(cycles))
|
||||||
|
for idx, strike_limit in enumerate(cycles, start=1):
|
||||||
|
print(f"Cycle {idx}/{len(cycles)} (strikeLimit={strike_limit})")
|
||||||
|
for stock, exp_list in expirations.items():
|
||||||
|
for exp in exp_list:
|
||||||
|
params = {"stock": stock, "expiration": exp}
|
||||||
|
if strike_limit is not None:
|
||||||
|
params["strikeLimit"] = strike_limit
|
||||||
|
data = http_get(args.base_url, params, args.timeout)
|
||||||
|
if "error" in data:
|
||||||
|
print(f"ERROR: {stock} {exp} -> {data}")
|
||||||
|
sys.exit(1)
|
||||||
|
selected_val = data.get("selected_expiration", {}).get("value")
|
||||||
|
if selected_val != exp:
|
||||||
|
print(
|
||||||
|
f"ERROR: selected expiration mismatch for {stock} {exp}: {selected_val}"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
expected_code = expected_code_from_epoch(exp)
|
||||||
|
if not all_contracts_match(data.get("calls", []), expected_code):
|
||||||
|
print(f"ERROR: calls expiry mismatch for {stock} {exp}")
|
||||||
|
sys.exit(1)
|
||||||
|
if not all_contracts_match(data.get("puts", []), expected_code):
|
||||||
|
print(f"ERROR: puts expiry mismatch for {stock} {exp}")
|
||||||
|
sys.exit(1)
|
||||||
|
available_calls, available_puts = baseline_counts[(stock, exp)]
|
||||||
|
expected_limit = strike_limit if strike_limit is not None else 25
|
||||||
|
expected_calls = min(expected_limit, available_calls)
|
||||||
|
expected_puts = min(expected_limit, available_puts)
|
||||||
|
if data.get("total_calls") != expected_calls:
|
||||||
|
print(
|
||||||
|
f"ERROR: call count mismatch for {stock} {exp}: "
|
||||||
|
f"got {data.get('total_calls')} expected {expected_calls}"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if data.get("total_puts") != expected_puts:
|
||||||
|
print(
|
||||||
|
f"ERROR: put count mismatch for {stock} {exp}: "
|
||||||
|
f"got {data.get('total_puts')} expected {expected_puts}"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
expected_pruned_calls = max(0, available_calls - expected_calls)
|
||||||
|
expected_pruned_puts = max(0, available_puts - expected_puts)
|
||||||
|
if data.get("pruned_calls_count") != expected_pruned_calls:
|
||||||
|
print(
|
||||||
|
f"ERROR: pruned calls mismatch for {stock} {exp}: "
|
||||||
|
f"got {data.get('pruned_calls_count')} expected {expected_pruned_calls}"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if data.get("pruned_puts_count") != expected_pruned_puts:
|
||||||
|
print(
|
||||||
|
f"ERROR: pruned puts mismatch for {stock} {exp}: "
|
||||||
|
f"got {data.get('pruned_puts_count')} expected {expected_pruned_puts}"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
print(f"Cycle {idx} OK")
|
||||||
|
|
||||||
|
print("\nAll cycles completed successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
145
scripts/test_profile_cycles.py
Normal file
145
scripts/test_profile_cycles.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"]
|
||||||
|
|
||||||
|
REQUIRED_SECTIONS = [
|
||||||
|
"key_metrics",
|
||||||
|
"valuation",
|
||||||
|
"profitability",
|
||||||
|
"growth",
|
||||||
|
"financial_strength",
|
||||||
|
"cashflow",
|
||||||
|
"ownership",
|
||||||
|
"analyst",
|
||||||
|
"earnings",
|
||||||
|
"performance",
|
||||||
|
]
|
||||||
|
|
||||||
|
REQUIRED_KEY_METRICS = [
|
||||||
|
"previous_close",
|
||||||
|
"open",
|
||||||
|
"bid",
|
||||||
|
"ask",
|
||||||
|
"beta",
|
||||||
|
"eps_trailing",
|
||||||
|
"dividend_rate",
|
||||||
|
"current_price",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def http_get(base_url, params, timeout):
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
url = f"{base_url}?{query}"
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_list(value, default):
|
||||||
|
if not value:
|
||||||
|
return default
|
||||||
|
return [item.strip() for item in value.split(",") if item.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def build_signature(data):
|
||||||
|
return {
|
||||||
|
"key_metrics_keys": sorted(data.get("key_metrics", {}).keys()),
|
||||||
|
"valuation_keys": sorted(data.get("valuation", {}).keys()),
|
||||||
|
"profitability_keys": sorted(data.get("profitability", {}).keys()),
|
||||||
|
"growth_keys": sorted(data.get("growth", {}).keys()),
|
||||||
|
"financial_strength_keys": sorted(data.get("financial_strength", {}).keys()),
|
||||||
|
"cashflow_keys": sorted(data.get("cashflow", {}).keys()),
|
||||||
|
"ownership_keys": sorted(data.get("ownership", {}).keys()),
|
||||||
|
"analyst_keys": sorted(data.get("analyst", {}).keys()),
|
||||||
|
"earnings_keys": sorted(data.get("earnings", {}).keys()),
|
||||||
|
"performance_keys": sorted(data.get("performance", {}).keys()),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_payload(symbol, data):
|
||||||
|
if "error" in data:
|
||||||
|
return f"API error for {symbol}: {data}"
|
||||||
|
if data.get("stock", "").upper() != symbol.upper():
|
||||||
|
return f"Symbol mismatch: expected {symbol} got {data.get('stock')}"
|
||||||
|
validation = data.get("validation", {})
|
||||||
|
if validation.get("symbol_match") is not True:
|
||||||
|
return f"Validation symbol_match failed for {symbol}: {validation}"
|
||||||
|
if validation.get("issues"):
|
||||||
|
return f"Validation issues for {symbol}: {validation}"
|
||||||
|
|
||||||
|
for section in REQUIRED_SECTIONS:
|
||||||
|
if section not in data:
|
||||||
|
return f"Missing section {section} for {symbol}"
|
||||||
|
|
||||||
|
key_metrics = data.get("key_metrics", {})
|
||||||
|
for field in REQUIRED_KEY_METRICS:
|
||||||
|
if field not in key_metrics:
|
||||||
|
return f"Missing key metric {field} for {symbol}"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles")
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-url",
|
||||||
|
default="http://127.0.0.1:9777/profile",
|
||||||
|
help="Base URL for /profile",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--symbols",
|
||||||
|
default=",".join(DEFAULT_SYMBOLS),
|
||||||
|
help="Comma-separated stock symbols",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--runs",
|
||||||
|
type=int,
|
||||||
|
default=8,
|
||||||
|
help="Number of validation runs per symbol",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--timeout",
|
||||||
|
type=int,
|
||||||
|
default=180,
|
||||||
|
help="Request timeout in seconds",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--sleep",
|
||||||
|
type=float,
|
||||||
|
default=0.2,
|
||||||
|
help="Sleep between requests",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
symbols = parse_list(args.symbols, DEFAULT_SYMBOLS)
|
||||||
|
signatures = {}
|
||||||
|
|
||||||
|
print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}")
|
||||||
|
for run in range(1, args.runs + 1):
|
||||||
|
print(f"Cycle {run}/{args.runs}")
|
||||||
|
for symbol in symbols:
|
||||||
|
data = http_get(args.base_url, {"stock": symbol}, args.timeout)
|
||||||
|
error = validate_payload(symbol, data)
|
||||||
|
if error:
|
||||||
|
print(f"ERROR: {error}")
|
||||||
|
sys.exit(1)
|
||||||
|
signature = build_signature(data)
|
||||||
|
if symbol not in signatures:
|
||||||
|
signatures[symbol] = signature
|
||||||
|
elif signatures[symbol] != signature:
|
||||||
|
print(f"ERROR: Signature changed for {symbol}")
|
||||||
|
print(f"Baseline: {signatures[symbol]}")
|
||||||
|
print(f"Current: {signature}")
|
||||||
|
sys.exit(1)
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
print(f"Cycle {run} OK")
|
||||||
|
|
||||||
|
print("\nAll profile cycles completed successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user