Files
SimpleScraper/scripts/test_profile_cycles.py
2025-12-29 00:45:13 -08:00

159 lines
4.6 KiB
Python

import argparse
import json
import sys
import time
import urllib.parse
import urllib.request
DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"]
REQUIRED_SECTIONS = [
"company_profile",
"summary_detail",
"default_key_statistics",
"financial_data",
"price",
"key_metrics",
"recommendation_trend",
"upgrade_downgrade_history",
"earnings",
"calendar_events",
"equity_performance",
"performance_overview",
"quote",
"quote_type",
"recent_news",
]
REQUIRED_COMPANY_FIELDS = ["longBusinessSummary", "industry", "sector"]
REQUIRED_KEY_METRICS = [
"previous_close",
"open",
"bid",
"ask",
"beta",
"eps_trailing",
"dividend_rate",
"current_price",
]
def http_get(base_url, params, timeout):
query = urllib.parse.urlencode(params)
url = f"{base_url}?{query}"
with urllib.request.urlopen(url, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
def parse_list(value, default):
if not value:
return default
return [item.strip() for item in value.split(",") if item.strip()]
def build_signature(data):
return {
"company_profile_keys": sorted(data.get("company_profile", {}).keys()),
"summary_detail_keys": sorted(data.get("summary_detail", {}).keys()),
"default_key_statistics_keys": sorted(
data.get("default_key_statistics", {}).keys()
),
"financial_data_keys": sorted(data.get("financial_data", {}).keys()),
"price_keys": sorted(data.get("price", {}).keys()),
"key_metrics_keys": sorted(data.get("key_metrics", {}).keys()),
"data_sources_keys": sorted(data.get("data_sources", {}).keys()),
}
def validate_payload(symbol, data):
if "error" in data:
return f"API error for {symbol}: {data}"
if data.get("stock", "").upper() != symbol.upper():
return f"Symbol mismatch: expected {symbol} got {data.get('stock')}"
validation = data.get("validation", {})
if validation.get("symbol_match") is not True:
return f"Validation symbol_match failed for {symbol}: {validation}"
if validation.get("issues"):
return f"Validation issues for {symbol}: {validation}"
for section in REQUIRED_SECTIONS:
if section not in data:
return f"Missing section {section} for {symbol}"
company_profile = data.get("company_profile", {})
for field in REQUIRED_COMPANY_FIELDS:
if field not in company_profile:
return f"Missing company field {field} for {symbol}"
key_metrics = data.get("key_metrics", {})
for field in REQUIRED_KEY_METRICS:
if field not in key_metrics:
return f"Missing key metric {field} for {symbol}"
if not data.get("news_summary") and not data.get("recent_news"):
return f"Missing news summary and recent news for {symbol}"
return None
def main():
parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles")
parser.add_argument(
"--base-url",
default="http://127.0.0.1:9777/profile",
help="Base URL for /profile",
)
parser.add_argument(
"--symbols",
default=",".join(DEFAULT_SYMBOLS),
help="Comma-separated stock symbols",
)
parser.add_argument(
"--runs",
type=int,
default=8,
help="Number of validation runs per symbol",
)
parser.add_argument(
"--timeout",
type=int,
default=180,
help="Request timeout in seconds",
)
parser.add_argument(
"--sleep",
type=float,
default=0.2,
help="Sleep between requests",
)
args = parser.parse_args()
symbols = parse_list(args.symbols, DEFAULT_SYMBOLS)
signatures = {}
print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}")
for run in range(1, args.runs + 1):
print(f"Cycle {run}/{args.runs}")
for symbol in symbols:
data = http_get(args.base_url, {"stock": symbol}, args.timeout)
error = validate_payload(symbol, data)
if error:
print(f"ERROR: {error}")
sys.exit(1)
signature = build_signature(data)
if symbol not in signatures:
signatures[symbol] = signature
elif signatures[symbol] != signature:
print(f"ERROR: Signature changed for {symbol}")
print(f"Baseline: {signatures[symbol]}")
print(f"Current: {signature}")
sys.exit(1)
time.sleep(args.sleep)
print(f"Cycle {run} OK")
print("\nAll profile cycles completed successfully.")
if __name__ == "__main__":
main()