Files
SimpleScraper/scripts/test_profile_cycles.py
2025-12-29 12:27:30 -08:00

146 lines
4.2 KiB
Python

import argparse
import json
import sys
import time
import urllib.parse
import urllib.request
DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"]
REQUIRED_SECTIONS = [
"key_metrics",
"valuation",
"profitability",
"growth",
"financial_strength",
"cashflow",
"ownership",
"analyst",
"earnings",
"performance",
]
REQUIRED_KEY_METRICS = [
"previous_close",
"open",
"bid",
"ask",
"beta",
"eps_trailing",
"dividend_rate",
"current_price",
]
def http_get(base_url, params, timeout):
query = urllib.parse.urlencode(params)
url = f"{base_url}?{query}"
with urllib.request.urlopen(url, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
def parse_list(value, default):
if not value:
return default
return [item.strip() for item in value.split(",") if item.strip()]
def build_signature(data):
return {
"key_metrics_keys": sorted(data.get("key_metrics", {}).keys()),
"valuation_keys": sorted(data.get("valuation", {}).keys()),
"profitability_keys": sorted(data.get("profitability", {}).keys()),
"growth_keys": sorted(data.get("growth", {}).keys()),
"financial_strength_keys": sorted(data.get("financial_strength", {}).keys()),
"cashflow_keys": sorted(data.get("cashflow", {}).keys()),
"ownership_keys": sorted(data.get("ownership", {}).keys()),
"analyst_keys": sorted(data.get("analyst", {}).keys()),
"earnings_keys": sorted(data.get("earnings", {}).keys()),
"performance_keys": sorted(data.get("performance", {}).keys()),
}
def validate_payload(symbol, data):
if "error" in data:
return f"API error for {symbol}: {data}"
if data.get("stock", "").upper() != symbol.upper():
return f"Symbol mismatch: expected {symbol} got {data.get('stock')}"
validation = data.get("validation", {})
if validation.get("symbol_match") is not True:
return f"Validation symbol_match failed for {symbol}: {validation}"
if validation.get("issues"):
return f"Validation issues for {symbol}: {validation}"
for section in REQUIRED_SECTIONS:
if section not in data:
return f"Missing section {section} for {symbol}"
key_metrics = data.get("key_metrics", {})
for field in REQUIRED_KEY_METRICS:
if field not in key_metrics:
return f"Missing key metric {field} for {symbol}"
return None
def main():
parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles")
parser.add_argument(
"--base-url",
default="http://127.0.0.1:9777/profile",
help="Base URL for /profile",
)
parser.add_argument(
"--symbols",
default=",".join(DEFAULT_SYMBOLS),
help="Comma-separated stock symbols",
)
parser.add_argument(
"--runs",
type=int,
default=8,
help="Number of validation runs per symbol",
)
parser.add_argument(
"--timeout",
type=int,
default=180,
help="Request timeout in seconds",
)
parser.add_argument(
"--sleep",
type=float,
default=0.2,
help="Sleep between requests",
)
args = parser.parse_args()
symbols = parse_list(args.symbols, DEFAULT_SYMBOLS)
signatures = {}
print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}")
for run in range(1, args.runs + 1):
print(f"Cycle {run}/{args.runs}")
for symbol in symbols:
data = http_get(args.base_url, {"stock": symbol}, args.timeout)
error = validate_payload(symbol, data)
if error:
print(f"ERROR: {error}")
sys.exit(1)
signature = build_signature(data)
if symbol not in signatures:
signatures[symbol] = signature
elif signatures[symbol] != signature:
print(f"ERROR: Signature changed for {symbol}")
print(f"Baseline: {signatures[symbol]}")
print(f"Current: {signature}")
sys.exit(1)
time.sleep(args.sleep)
print(f"Cycle {run} OK")
print("\nAll profile cycles completed successfully.")
if __name__ == "__main__":
main()