import argparse import json import sys import time import urllib.parse import urllib.request DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"] REQUIRED_SECTIONS = [ "company_profile", "summary_detail", "default_key_statistics", "financial_data", "price", "key_metrics", "recommendation_trend", "upgrade_downgrade_history", "earnings", "calendar_events", "equity_performance", "performance_overview", "quote", "quote_type", "recent_news", ] REQUIRED_COMPANY_FIELDS = ["longBusinessSummary", "industry", "sector"] REQUIRED_KEY_METRICS = [ "previous_close", "open", "bid", "ask", "beta", "eps_trailing", "dividend_rate", "current_price", ] def http_get(base_url, params, timeout): query = urllib.parse.urlencode(params) url = f"{base_url}?{query}" with urllib.request.urlopen(url, timeout=timeout) as resp: return json.loads(resp.read().decode("utf-8")) def parse_list(value, default): if not value: return default return [item.strip() for item in value.split(",") if item.strip()] def build_signature(data): return { "company_profile_keys": sorted(data.get("company_profile", {}).keys()), "summary_detail_keys": sorted(data.get("summary_detail", {}).keys()), "default_key_statistics_keys": sorted( data.get("default_key_statistics", {}).keys() ), "financial_data_keys": sorted(data.get("financial_data", {}).keys()), "price_keys": sorted(data.get("price", {}).keys()), "key_metrics_keys": sorted(data.get("key_metrics", {}).keys()), "data_sources_keys": sorted(data.get("data_sources", {}).keys()), } def validate_payload(symbol, data): if "error" in data: return f"API error for {symbol}: {data}" if data.get("stock", "").upper() != symbol.upper(): return f"Symbol mismatch: expected {symbol} got {data.get('stock')}" validation = data.get("validation", {}) if validation.get("symbol_match") is not True: return f"Validation symbol_match failed for {symbol}: {validation}" if validation.get("issues"): return f"Validation issues for {symbol}: {validation}" for section in REQUIRED_SECTIONS: if section not in data: return f"Missing section {section} for {symbol}" company_profile = data.get("company_profile", {}) for field in REQUIRED_COMPANY_FIELDS: if field not in company_profile: return f"Missing company field {field} for {symbol}" key_metrics = data.get("key_metrics", {}) for field in REQUIRED_KEY_METRICS: if field not in key_metrics: return f"Missing key metric {field} for {symbol}" if not data.get("news_summary") and not data.get("recent_news"): return f"Missing news summary and recent news for {symbol}" return None def main(): parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles") parser.add_argument( "--base-url", default="http://127.0.0.1:9777/profile", help="Base URL for /profile", ) parser.add_argument( "--symbols", default=",".join(DEFAULT_SYMBOLS), help="Comma-separated stock symbols", ) parser.add_argument( "--runs", type=int, default=8, help="Number of validation runs per symbol", ) parser.add_argument( "--timeout", type=int, default=180, help="Request timeout in seconds", ) parser.add_argument( "--sleep", type=float, default=0.2, help="Sleep between requests", ) args = parser.parse_args() symbols = parse_list(args.symbols, DEFAULT_SYMBOLS) signatures = {} print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}") for run in range(1, args.runs + 1): print(f"Cycle {run}/{args.runs}") for symbol in symbols: data = http_get(args.base_url, {"stock": symbol}, args.timeout) error = validate_payload(symbol, data) if error: print(f"ERROR: {error}") sys.exit(1) signature = build_signature(data) if symbol not in signatures: signatures[symbol] = signature elif signatures[symbol] != signature: print(f"ERROR: Signature changed for {symbol}") print(f"Baseline: {signatures[symbol]}") print(f"Current: {signature}") sys.exit(1) time.sleep(args.sleep) print(f"Cycle {run} OK") print("\nAll profile cycles completed successfully.") if __name__ == "__main__": main()