159 lines
4.6 KiB
Python
159 lines
4.6 KiB
Python
import argparse
|
|
import json
|
|
import sys
|
|
import time
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
DEFAULT_SYMBOLS = ["AAPL", "AMZN", "MSFT", "TSLA"]
|
|
|
|
REQUIRED_SECTIONS = [
|
|
"company_profile",
|
|
"summary_detail",
|
|
"default_key_statistics",
|
|
"financial_data",
|
|
"price",
|
|
"key_metrics",
|
|
"recommendation_trend",
|
|
"upgrade_downgrade_history",
|
|
"earnings",
|
|
"calendar_events",
|
|
"equity_performance",
|
|
"performance_overview",
|
|
"quote",
|
|
"quote_type",
|
|
"recent_news",
|
|
]
|
|
|
|
REQUIRED_COMPANY_FIELDS = ["longBusinessSummary", "industry", "sector"]
|
|
REQUIRED_KEY_METRICS = [
|
|
"previous_close",
|
|
"open",
|
|
"bid",
|
|
"ask",
|
|
"beta",
|
|
"eps_trailing",
|
|
"dividend_rate",
|
|
"current_price",
|
|
]
|
|
|
|
|
|
def http_get(base_url, params, timeout):
|
|
query = urllib.parse.urlencode(params)
|
|
url = f"{base_url}?{query}"
|
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
|
return json.loads(resp.read().decode("utf-8"))
|
|
|
|
|
|
def parse_list(value, default):
|
|
if not value:
|
|
return default
|
|
return [item.strip() for item in value.split(",") if item.strip()]
|
|
|
|
|
|
def build_signature(data):
|
|
return {
|
|
"company_profile_keys": sorted(data.get("company_profile", {}).keys()),
|
|
"summary_detail_keys": sorted(data.get("summary_detail", {}).keys()),
|
|
"default_key_statistics_keys": sorted(
|
|
data.get("default_key_statistics", {}).keys()
|
|
),
|
|
"financial_data_keys": sorted(data.get("financial_data", {}).keys()),
|
|
"price_keys": sorted(data.get("price", {}).keys()),
|
|
"key_metrics_keys": sorted(data.get("key_metrics", {}).keys()),
|
|
"data_sources_keys": sorted(data.get("data_sources", {}).keys()),
|
|
}
|
|
|
|
|
|
def validate_payload(symbol, data):
|
|
if "error" in data:
|
|
return f"API error for {symbol}: {data}"
|
|
if data.get("stock", "").upper() != symbol.upper():
|
|
return f"Symbol mismatch: expected {symbol} got {data.get('stock')}"
|
|
validation = data.get("validation", {})
|
|
if validation.get("symbol_match") is not True:
|
|
return f"Validation symbol_match failed for {symbol}: {validation}"
|
|
if validation.get("issues"):
|
|
return f"Validation issues for {symbol}: {validation}"
|
|
|
|
for section in REQUIRED_SECTIONS:
|
|
if section not in data:
|
|
return f"Missing section {section} for {symbol}"
|
|
|
|
company_profile = data.get("company_profile", {})
|
|
for field in REQUIRED_COMPANY_FIELDS:
|
|
if field not in company_profile:
|
|
return f"Missing company field {field} for {symbol}"
|
|
|
|
key_metrics = data.get("key_metrics", {})
|
|
for field in REQUIRED_KEY_METRICS:
|
|
if field not in key_metrics:
|
|
return f"Missing key metric {field} for {symbol}"
|
|
|
|
if not data.get("news_summary") and not data.get("recent_news"):
|
|
return f"Missing news summary and recent news for {symbol}"
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Yahoo profile scraper test cycles")
|
|
parser.add_argument(
|
|
"--base-url",
|
|
default="http://127.0.0.1:9777/profile",
|
|
help="Base URL for /profile",
|
|
)
|
|
parser.add_argument(
|
|
"--symbols",
|
|
default=",".join(DEFAULT_SYMBOLS),
|
|
help="Comma-separated stock symbols",
|
|
)
|
|
parser.add_argument(
|
|
"--runs",
|
|
type=int,
|
|
default=8,
|
|
help="Number of validation runs per symbol",
|
|
)
|
|
parser.add_argument(
|
|
"--timeout",
|
|
type=int,
|
|
default=180,
|
|
help="Request timeout in seconds",
|
|
)
|
|
parser.add_argument(
|
|
"--sleep",
|
|
type=float,
|
|
default=0.2,
|
|
help="Sleep between requests",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
symbols = parse_list(args.symbols, DEFAULT_SYMBOLS)
|
|
signatures = {}
|
|
|
|
print(f"Running {args.runs} profile cycles for: {', '.join(symbols)}")
|
|
for run in range(1, args.runs + 1):
|
|
print(f"Cycle {run}/{args.runs}")
|
|
for symbol in symbols:
|
|
data = http_get(args.base_url, {"stock": symbol}, args.timeout)
|
|
error = validate_payload(symbol, data)
|
|
if error:
|
|
print(f"ERROR: {error}")
|
|
sys.exit(1)
|
|
signature = build_signature(data)
|
|
if symbol not in signatures:
|
|
signatures[symbol] = signature
|
|
elif signatures[symbol] != signature:
|
|
print(f"ERROR: Signature changed for {symbol}")
|
|
print(f"Baseline: {signatures[symbol]}")
|
|
print(f"Current: {signature}")
|
|
sys.exit(1)
|
|
time.sleep(args.sleep)
|
|
print(f"Cycle {run} OK")
|
|
|
|
print("\nAll profile cycles completed successfully.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|