"""Generate sectors_data.json for the SPX sectors dashboard page.

Data layers:
  - 11 GICS sectors (S&P 500 weights, 2026-Q1 snapshot)
  - For each sector: GICS industries (Level 3) with intra-sector weight
  - Forward P/E reference (Yardeni / FactSet style 2026-Q1 public values)
  - Trailing P/E live from yfinance per sector SPDR ETF

Re-check quarterly:
  - Sector weights / industry breakdown: State Street SPDR fact sheets
  - Forward P/E: Yardeni "S&P 500 Sectors & Industries Forward P/Es" weekly
"""

from __future__ import annotations

import json
import os
import statistics
from pathlib import Path

import yfinance as yf


OUTPUT = Path(__file__).resolve().parent / "sectors_data.json"


# Each sector:
#   etf:        sector SPDR ETF symbol
#   weight_pct: SPX weight (sum across sectors ≈ 100)
#   forward_pe: Yardeni / FactSet 2026-Q1 reference forward P/E
#   industries: GICS Level 3 industries with intra-sector weight (sums to 100)
SECTORS: list[dict] = [
    {
        "sector": "Information Technology",
        "etf": "XLK",
        "weight_pct": 30.5,
        "forward_pe": 27.5,
        "industries": [
            ("Software", 32.0),
            ("Semiconductors", 25.0),
            ("Technology Hardware, Storage & Peripherals", 18.0),
            ("Semiconductor Materials & Equipment", 7.0),
            ("IT Services", 8.0),
            ("Communications Equipment", 5.0),
            ("Electronic Equipment, Instruments & Components", 5.0),
        ],
    },
    {
        "sector": "Financials",
        "etf": "XLF",
        "weight_pct": 14.2,
        "forward_pe": 15.5,
        "industries": [
            ("Financial Services (incl. BRK.B, V, MA, AXP)", 46.0),
            ("Banks (Diversified & Regional)", 26.0),
            ("Insurance", 17.0),
            ("Capital Markets", 8.0),
            ("Consumer Finance", 3.0),
        ],
    },
    {
        "sector": "Health Care",
        "etf": "XLV",
        "weight_pct": 10.5,
        "forward_pe": 17.5,
        "industries": [
            ("Pharmaceuticals", 32.0),
            ("Health Care Equipment & Supplies", 26.0),
            ("Biotechnology", 14.0),
            ("Life Sciences Tools & Services", 12.0),
            ("Health Care Providers & Services", 11.0),
            ("Health Care Technology", 5.0),
        ],
    },
    {
        "sector": "Consumer Discretionary",
        "etf": "XLY",
        "weight_pct": 10.4,
        "forward_pe": 24.0,
        "industries": [
            ("Broadline Retail (AMZN-driven)", 26.0),
            ("Automobiles (TSLA-driven)", 17.0),
            ("Hotels, Restaurants & Leisure", 21.0),
            ("Specialty Retail", 13.0),
            ("Textiles, Apparel & Luxury Goods", 7.0),
            ("Household Durables", 7.0),
            ("Leisure Products", 4.0),
            ("Distributors", 5.0),
        ],
    },
    {
        "sector": "Communication Services",
        "etf": "XLC",
        "weight_pct": 9.6,
        "forward_pe": 19.5,
        "industries": [
            ("Interactive Media & Services (GOOGL, META)", 49.0),
            ("Entertainment (NFLX, DIS, WBD)", 19.0),
            ("Wireless Telecom (TMUS, VZ, T)", 24.0),
            ("Diversified Telecommunication Services", 5.0),
            ("Media (advertising/cable)", 3.0),
        ],
    },
    {
        "sector": "Industrials",
        "etf": "XLI",
        "weight_pct": 8.6,
        "forward_pe": 22.0,
        "industries": [
            ("Aerospace & Defense", 18.0),
            ("Machinery", 14.0),
            ("Ground Transportation (Rail / Trucking)", 9.0),
            ("Air Freight & Logistics", 7.0),
            ("Building Products", 7.0),
            ("Electrical Equipment", 8.0),
            ("Industrial Conglomerates (GE, HON)", 9.0),
            ("Professional Services", 9.0),
            ("Commercial Services & Supplies", 6.0),
            ("Trading Companies & Distributors", 5.0),
            ("Construction & Engineering", 4.0),
            ("Passenger Airlines", 4.0),
        ],
    },
    {
        "sector": "Consumer Staples",
        "etf": "XLP",
        "weight_pct": 5.6,
        "forward_pe": 21.0,
        "industries": [
            ("Beverages (KO, PEP)", 22.0),
            ("Household Products (PG)", 18.0),
            ("Tobacco (PM, MO)", 12.0),
            ("Food Products", 12.0),
            ("Consumer Staples Merchandise Retail (COST, WMT)", 24.0),
            ("Food & Staples Retailing", 8.0),
            ("Personal Care Products", 4.0),
        ],
    },
    {
        "sector": "Energy",
        "etf": "XLE",
        "weight_pct": 3.4,
        "forward_pe": 13.5,
        "industries": [
            ("Oil, Gas & Consumable Fuels", 88.0),
            ("Energy Equipment & Services", 12.0),
        ],
    },
    {
        "sector": "Materials",
        "etf": "XLB",
        "weight_pct": 2.6,
        "forward_pe": 18.0,
        "industries": [
            ("Chemicals (LIN, SHW, APD, ECL, DOW)", 70.0),
            ("Metals & Mining", 13.0),
            ("Containers & Packaging", 12.0),
            ("Construction Materials", 5.0),
        ],
    },
    {
        "sector": "Utilities",
        "etf": "XLU",
        "weight_pct": 2.4,
        "forward_pe": 17.5,
        "industries": [
            ("Electric Utilities", 62.0),
            ("Multi-Utilities", 24.0),
            ("Independent Power & Renewable Electricity", 5.0),
            ("Gas Utilities", 6.0),
            ("Water Utilities", 3.0),
        ],
    },
    {
        "sector": "Real Estate",
        "etf": "XLRE",
        "weight_pct": 2.2,
        "forward_pe": 17.5,
        "industries": [
            ("Specialized REITs (data center, self-storage)", 36.0),
            ("Telecom Tower REITs (AMT, CCI)", 13.0),
            ("Industrial REITs (PLD)", 13.0),
            ("Retail REITs", 12.0),
            ("Residential REITs", 10.0),
            ("Health Care REITs", 8.0),
            ("Office REITs", 3.0),
            ("Hotel & Resort REITs", 3.0),
            ("Real Estate Management & Development", 2.0),
        ],
    },
]


# Representative top holdings for each sub-industry — used to compute a
# market-cap-weighted forward P/E by aggregating per-ticker yfinance data
# (yfinance does NOT expose forwardPE for sector/industry ETFs, so we go
# directly to the constituents). Coverage target ≥80% of the sub-industry
# ETF's AUM. Re-check holdings quarterly against the issuer fact sheet.
INDUSTRY_TICKERS: dict[str, list[str]] = {
    # ── Information Technology ──────────────────────────────
    "Semiconductors": [
        "NVDA", "AVGO", "AMD", "QCOM", "INTC", "AMAT", "LRCX", "KLAC", "MU",
        "TXN", "ADI", "MRVL", "MCHP", "MPWR", "NXPI", "ON", "ASML", "TER", "ARM",
    ],
    "Software": [
        "MSFT", "ORCL", "CRM", "SAP", "ADBE", "NOW", "INTU", "PLTR", "IBM",
        "CRWD", "PANW", "FTNT", "SNOW", "WDAY", "ADSK", "TEAM", "DDOG", "ZS", "NET",
    ],
    # ── Health Care ─────────────────────────────────────────
    "Pharmaceuticals": [
        "LLY", "JNJ", "ABBV", "MRK", "PFE", "BMY", "ZTS", "VTRS",
    ],
    "Biotechnology": [
        "AMGN", "VRTX", "GILD", "REGN", "BIIB", "ALNY", "MRNA", "BMRN", "INCY",
    ],
    "Health Care Equipment & Supplies": [
        "ISRG", "ABT", "MDT", "SYK", "BSX", "EW", "BDX", "BAX", "ZBH", "RMD",
        "PODD", "IDXX", "STE", "DXCM", "RVTY", "WST", "ALGN", "GEHC",
    ],
    # ── Financials ──────────────────────────────────────────
    "Banks (Diversified & Regional)": [
        "JPM", "BAC", "WFC", "C", "USB", "PNC", "TFC", "MTB", "FITB", "RF",
        "HBAN", "KEY", "CFG", "CMA",
    ],
    "Financial Services (incl. BRK.B, V, MA, AXP)": [
        # FISV (Fiserv) renamed to FI in 2022; FIS is unrelated (Fidelity National Info)
        "BRK-B", "V", "MA", "AXP", "PYPL", "FIS", "FI", "COF", "GPN", "DFS",
    ],
    "Insurance": [
        "PGR", "CB", "AIG", "MET", "PRU", "ALL", "AFL", "HIG", "TRV", "MMC",
        "AON", "AJG", "WTW", "AIZ",
    ],
    # ── Industrials ─────────────────────────────────────────
    "Aerospace & Defense": [
        "RTX", "BA", "LMT", "GE", "GD", "NOC", "LHX", "TDG", "HEI", "HII",
        "AXON", "TXT",
    ],
    # ── Communication Services ──────────────────────────────
    "Interactive Media & Services (GOOGL, META)": [
        "GOOGL", "GOOG", "META", "PINS", "MTCH",
    ],
    # ── Energy ──────────────────────────────────────────────
    "Oil, Gas & Consumable Fuels": [
        "XOM", "CVX", "COP", "EOG", "OXY", "MPC", "PSX", "VLO", "HES", "FANG",
        "KMI", "WMB", "ET", "DVN",
    ],
}


# GICS Level 3 industries inside Information Technology — when a user clicks
# "Semiconductors", they often expect to see SOXX-style sub-industries.
# We surface a small lookup so the dashboard can show "ETF you might care about"
# next to each industry row.
ETF_HINTS: dict[str, list[str]] = {
    "Semiconductors": ["SOXX", "SMH"],
    "Semiconductor Materials & Equipment": ["SOXX (partial)"],
    "Software": ["IGV"],
    "Biotechnology": ["IBB", "XBI"],
    "Pharmaceuticals": ["IHE"],
    "Banks (Diversified & Regional)": ["KBE", "KRE"],
    "Aerospace & Defense": ["ITA", "XAR"],
    "Oil, Gas & Consumable Fuels": ["XOP"],
    "Energy Equipment & Services": ["XES"],
    "Chemicals (LIN, SHW, APD, ECL, DOW)": ["IYM"],
    "Specialized REITs (data center, self-storage)": ["VPN", "SRVR"],
    "Telecom Tower REITs (AMT, CCI)": ["VPN"],
}


def _live_trailing_pe(ticker: str) -> float | None:
    try:
        info = yf.Ticker(ticker).info or {}
        v = info.get("trailingPE")
        return float(v) if v is not None else None
    except Exception:
        return None


def _live_etf_history_monthly_full(ticker: str) -> list[dict]:
    """Full-history monthly close prices for an ETF, used for regime / NBER backtests.
    Returns [] on failure."""
    try:
        hist = yf.Ticker(ticker).history(period="max", interval="1mo", auto_adjust=True)
        if hist.empty:
            return []
        return [
            {"date": dt.strftime("%Y-%m-%d"), "close": round(float(p), 4)}
            for dt, p in zip(hist.index, hist["Close"])
            if p is not None and not (isinstance(p, float) and (p != p))
        ]
    except Exception:
        return []


def _live_etf_history_5y_weekly(ticker: str) -> list[dict]:
    """5y weekly close prices for an ETF, formatted for charting.
    Returns [] on failure. We use weekly to keep JSON size sane (~260 points)."""
    try:
        hist = yf.Ticker(ticker).history(period="5y", interval="1wk", auto_adjust=True)
        if hist.empty:
            return []
        return [
            {"date": dt.strftime("%Y-%m-%d"), "close": round(float(p), 2)}
            for dt, p in zip(hist.index, hist["Close"])
            if p is not None and not (isinstance(p, float) and (p != p))  # skip NaN
        ]
    except Exception:
        return []


# NBER official US business cycle peaks/troughs (recession start / end dates).
# Sector SPDR ETFs were inception 1998-12, so we use 2001 onward where coverage is complete.
NBER_RECESSIONS: list[dict] = [
    {"name": "2001 dot-com",  "start": "2001-03-01", "end": "2001-11-30"},
    {"name": "2008 GFC",      "start": "2007-12-01", "end": "2009-06-30"},
    {"name": "2020 COVID",    "start": "2020-02-01", "end": "2020-04-30"},
]


def _series_return_between(series: list[dict], start_date: str, end_date: str) -> float | None:
    """Cumulative total return of an ETF time-series between two dates.
    Picks the first close ≥ start_date and the last close ≤ end_date.
    Returns None if either anchor is missing (ETF not yet listed)."""
    if not series:
        return None
    start_close = next((r["close"] for r in series if r["date"] >= start_date), None)
    end_close = None
    for r in series:
        if r["date"] <= end_date:
            end_close = r["close"]
        else:
            break
    if start_close is None or end_close is None or start_close <= 0:
        return None
    return round((end_close / start_close - 1) * 100, 2)


def _add_months(date_str: str, months: int) -> str:
    """date_str + N months, returned as ISO date string (day=01)."""
    from datetime import date
    y, m, _ = date_str.split("-")
    y, m = int(y), int(m)
    total = (y * 12 + (m - 1)) + months
    ny, nm = divmod(total, 12)
    return f"{ny:04d}-{nm + 1:02d}-01"


def _compute_weight_history(
    weekly_history: dict[str, list[dict]], sectors: list[dict]
) -> dict[str, list[dict]]:
    """Approximate per-sector SPX weight over 5y from ETF total-return ratios.

    Math: anchor at "today's known weights". For each historical date t:
        raw_i(t) = w_i(today) × (ETF_i_price(t) / ETF_i_today) / (SPY(t) / SPY_today)
        w_i(t)   = raw_i(t) / Σ_j raw_j(t) × 100
    The (ETF_i / SPY) ratio captures relative cumulative return → reverse-engineers
    each sector's market-cap fraction. Then renormalize so the 11 sectors sum to 100%.

    Caveat: assumes intra-sector share-count proportions are stable (no major
    IPO/buyback rebalancing). For 5y this is a decent approximation; for sector-
    weight time series of true accuracy, S&P / FactSet historical data needed.
    """
    spy = weekly_history.get("SPY", [])
    if not spy:
        return {}
    spy_today = spy[-1]["close"]
    spy_map = {r["date"]: r["close"] for r in spy}

    # Per-sector: today close + weight anchor
    sector_anchors: dict[str, dict] = {}
    for s in sectors:
        series = weekly_history.get(s["etf"], [])
        if not series:
            continue
        sector_anchors[s["etf"]] = {
            "today_close": series[-1]["close"],
            "w_today": s["weight_pct"],
            "series_map": {r["date"]: r["close"] for r in series},
        }

    out: dict[str, list[dict]] = {etf: [] for etf in sector_anchors}
    for r in spy:
        date = r["date"]
        spy_t = r["close"]
        spy_ratio = spy_t / spy_today  # SPY return from t to today (inverse)
        if spy_ratio <= 0:
            continue
        raw = {}
        for etf, anc in sector_anchors.items():
            etf_t = anc["series_map"].get(date)
            if etf_t is None or etf_t <= 0:
                continue
            etf_ratio = etf_t / anc["today_close"]
            raw[etf] = anc["w_today"] * (etf_ratio / spy_ratio)
        total = sum(raw.values())
        if total <= 0:
            continue
        for etf, raw_w in raw.items():
            out[etf].append({"date": date, "weight_pct": round(raw_w / total * 100, 3)})
    return out


def _build_sector_rotation(monthly_history: dict[str, list[dict]], sectors: list[dict]) -> dict:
    """For each NBER recession, compute per-sector total return during:
        - Recession period (peak → trough)
        - Early Recovery: 12 months after recession end
    And one regime spanning the entire sample (1999-12 → today) for "Full History" reference.
    Returns dict ready for JSON serialization."""
    spy = monthly_history.get("SPY", [])
    if not spy:
        return {}

    regimes: list[dict] = []
    for rec in NBER_RECESSIONS:
        regimes.append({
            "regime": f"Recession ({rec['name']})",
            "phase": "recession",
            "start": rec["start"],
            "end": rec["end"],
        })
        regimes.append({
            "regime": f"Early Recovery ({rec['name']})",
            "phase": "early_recovery",
            "start": rec["end"],
            "end": _add_months(rec["end"], 12),
        })

    # Aggregate "average across all 3 NBER recessions" rows for stability
    all_etfs = [s["etf"] for s in sectors] + ["SPY"]
    rows = []
    for reg in regimes:
        row = {"regime": reg["regime"], "phase": reg["phase"], "start": reg["start"], "end": reg["end"], "returns": {}}
        spy_ret = _series_return_between(spy, reg["start"], reg["end"])
        row["returns"]["SPY"] = spy_ret
        for etf in all_etfs:
            if etf == "SPY":
                continue
            ret = _series_return_between(monthly_history.get(etf, []), reg["start"], reg["end"])
            row["returns"][etf] = ret
            row["returns"][f"{etf}_alpha"] = round(ret - spy_ret, 2) if (ret is not None and spy_ret is not None) else None
        rows.append(row)

    # Composite "average across recessions" / "average across recoveries"
    def _avg_across(phase: str) -> dict:
        out: dict[str, float | None] = {}
        for etf in all_etfs:
            vals = [r["returns"].get(etf) for r in rows if r["phase"] == phase and r["returns"].get(etf) is not None]
            out[etf] = round(sum(vals) / len(vals), 2) if vals else None
            if etf != "SPY":
                avals = [r["returns"].get(f"{etf}_alpha") for r in rows if r["phase"] == phase and r["returns"].get(f"{etf}_alpha") is not None]
                out[f"{etf}_alpha"] = round(sum(avals) / len(avals), 2) if avals else None
        return out

    composite = [
        {"regime": "AVG · Recession",       "phase": "recession",       "start": "—", "end": "—", "returns": _avg_across("recession")},
        {"regime": "AVG · Early Recovery",  "phase": "early_recovery",  "start": "—", "end": "—", "returns": _avg_across("early_recovery")},
    ]

    return {
        "regimes": composite + rows,
        "etfs": all_etfs,
        "method": (
            "Per-sector cumulative total return during NBER recession peaks/troughs and "
            "the 12-month period immediately after each recession ('early recovery'). "
            "alpha = sector return − SPY return for that window. "
            "ETFs without coverage (e.g. XLC inception 2018, XLRE 2015) show n/a."
        ),
    }


def _live_10y_treasury_yield_pct() -> float | None:
    """10Y Treasury yield (percent), used as the risk-free anchor for ERP.
    Source: yfinance ^TNX (CBOE 10Y Treasury Yield Index). Returns the most
    recent close in percent (e.g. 4.31 means 4.31%)."""
    try:
        hist = yf.Ticker("^TNX").history(period="5d")
        if hist.empty:
            return None
        return round(float(hist["Close"].iloc[-1]), 2)
    except Exception:
        return None


def _weighted_industry_fwd_pe(tickers: list[str]) -> dict | None:
    """Aggregate forward P/E for a sub-industry as a true portfolio P/E:
        Σ(market_cap_i) / Σ(market_cap_i / forward_PE_i)
    i.e. total portfolio market cap divided by total forward earnings —
    mathematically equivalent to a market-cap-weighted HARMONIC mean of P/E.
    This matches the Yardeni / FactSet / JPM "Guide to Markets" sector P/E
    convention. The naive arithmetic mcap-weighted PE is wrong because it
    over-weights high-PE outliers (e.g. ARM @ 109) whose earnings
    contribution is tiny. Returns None when no constituents have valid data."""
    if not tickers:
        return None
    sum_mcap = 0.0
    sum_earnings = 0.0
    used = 0
    skipped: list[str] = []
    for t in tickers:
        # Per-ticker try only — outer wrapper removed so genuine bugs (e.g.
        # type mismatch, non-numeric forwardPE) surface in stderr instead of
        # being swallowed as "industry has no data".
        try:
            info = yf.Ticker(t).info or {}
        except Exception:
            skipped.append(t)
            continue
        fpe = info.get("forwardPE")
        mcap = info.get("marketCap")
        if isinstance(fpe, (int, float)) and isinstance(mcap, (int, float)) and fpe > 0 and mcap > 0:
            sum_mcap += float(mcap)
            sum_earnings += float(mcap) / float(fpe)
            used += 1
        else:
            skipped.append(t)
    if sum_earnings <= 0:
        return None
    return {
        "value": round(sum_mcap / sum_earnings, 2),
        "method": "portfolio P/E (= Σ mcap / Σ forward earnings, harmonic mcap-weighted)",
        "constituents_used": used,
        "constituents_skipped": skipped,
        "total_mcap_billion_usd": round(sum_mcap / 1e9, 1),
        "total_forward_earnings_billion_usd": round(sum_earnings / 1e9, 1),
    }


def _implied_peg(forward_pe: float | None, trailing_pe: float | None) -> float | None:
    """Implied PEG = fwdPE / implied_growth%, where
    implied_growth% = (trailing_PE / forward_PE - 1) * 100.
    Returns None when growth ≤ 0 (PEG undefined / "infinitely expensive")."""
    if not forward_pe or not trailing_pe or forward_pe <= 0 or trailing_pe <= 0:
        return None
    growth_pct = (trailing_pe / forward_pe - 1) * 100
    if growth_pct <= 0:
        return None
    return round(forward_pe / growth_pct, 3)


def _zscore(values: list[float | None]) -> list[float | None]:
    """Per-element z-score. Drops None from mean/std calculation; None passes through."""
    valid = [v for v in values if v is not None]
    if len(valid) < 2:
        return [None] * len(values)
    mu = statistics.mean(valid)
    sd = statistics.stdev(valid)
    if sd == 0:
        return [0.0 if v is not None else None for v in values]
    return [round((v - mu) / sd, 3) if v is not None else None for v in values]


def _ey_erp(forward_pe: float | None, ust10y_pct: float | None) -> tuple[float | None, float | None]:
    """Earnings Yield = 1/fwdPE (in %); ERP = EY - 10Y Treasury yield (both in %)."""
    if forward_pe is None or forward_pe <= 0:
        return None, None
    ey = 100.0 / forward_pe
    if ust10y_pct is None:
        return round(ey, 2), None
    return round(ey, 2), round(ey - ust10y_pct, 2)


def build_payload() -> dict:
    ust10y = _live_10y_treasury_yield_pct()
    # Full-history monthly prices for sector ETFs + SPY (1998-12+ for 9 core ETFs,
    # 2015-10+ for XLRE, 2018-06+ for XLC). Single dataset powers both:
    #   - the "price walk" chart (long history with rangeslider zoom)
    #   - the "weight history" reconstruction
    #   - the NBER recession regime analysis
    history_tickers = [s["etf"] for s in SECTORS] + ["SPY"]
    monthly_full = {t: _live_etf_history_monthly_full(t) for t in history_tickers}
    sectors = []
    weight_total = 0.0
    fwd_pe_weighted = 0.0
    for s in SECTORS:
        trailing_pe = _live_trailing_pe(s["etf"])
        # validate intra-sector industries sum to 100 (allow ±1.5pp drift since
        # we're hand-rounding)
        ind_total = sum(w for _, w in s["industries"])
        industries = []
        for name, pct in s["industries"]:
            ind_obj = {
                "name": name,
                "weight_in_sector_pct": pct,
                "weight_in_spx_pct": round(s["weight_pct"] * pct / 100.0, 3),
                "etf_hints": ETF_HINTS.get(name, []),
            }
            agg = _weighted_industry_fwd_pe(INDUSTRY_TICKERS.get(name, []))
            if agg is not None:
                ind_obj["forward_pe"] = agg["value"]
                ind_obj["forward_pe_meta"] = agg
                ey, erp = _ey_erp(agg["value"], ust10y)
                ind_obj["earnings_yield_pct"] = ey
                ind_obj["erp_pct"] = erp
            industries.append(ind_obj)
        ey, erp = _ey_erp(s["forward_pe"], ust10y)
        peg = _implied_peg(s["forward_pe"], trailing_pe)
        sectors.append(
            {
                "sector": s["sector"],
                "etf": s["etf"],
                "weight_pct": s["weight_pct"],
                "trailing_pe": trailing_pe,
                "forward_pe": s["forward_pe"],
                "earnings_yield_pct": ey,
                "erp_pct": erp,
                "implied_peg": peg,
                "industry_total_check": round(ind_total, 2),
                "industries": industries,
            }
        )
        weight_total += s["weight_pct"]
        fwd_pe_weighted += s["weight_pct"] * s["forward_pe"]

    spx_forward_pe = round(fwd_pe_weighted / weight_total, 2) if weight_total else None

    spx_ey, spx_erp = _ey_erp(spx_forward_pe, ust10y)

    # Composite Value Score (simplified, 2-factor):
    #   score = α × z(1/PEG) + β × z(ERP),  α=0.6, β=0.4
    # Higher = better value. z-scored across the 11 sectors.
    ALPHA, BETA = 0.6, 0.4
    inv_pegs = [(1.0 / s["implied_peg"]) if s["implied_peg"] else None for s in sectors]
    erps = [s["erp_pct"] for s in sectors]
    z_inv_peg = _zscore(inv_pegs)
    z_erp = _zscore(erps)
    for i, s in enumerate(sectors):
        zp, ze = z_inv_peg[i], z_erp[i]
        if zp is None or ze is None:
            s["composite_score"] = None
            s["composite_score_breakdown"] = {
                "z_inv_peg": zp, "z_erp": ze, "alpha": ALPHA, "beta": BETA,
                "note": "PEG undefined (implied growth ≤ 0); composite score unavailable",
            }
        else:
            s["composite_score"] = round(ALPHA * zp + BETA * ze, 3)
            s["composite_score_breakdown"] = {
                "z_inv_peg": zp, "z_erp": ze, "alpha": ALPHA, "beta": BETA,
            }

    sector_rotation = _build_sector_rotation(monthly_full, sectors)
    weight_history = _compute_weight_history(monthly_full, sectors)

    return {
        "as_of": "2026-Q1",
        "weight_total_check": round(weight_total, 2),
        "spx_forward_pe": spx_forward_pe,
        "ust10y_yield_pct": ust10y,
        "spx_earnings_yield_pct": spx_ey,
        "spx_erp_pct": spx_erp,
        "historical_prices_max_monthly": monthly_full,
        "sector_weight_history_max_monthly": weight_history,
        "sector_rotation": sector_rotation,
        "data_sources": {
            "sector_weights": "S&P / State Street SPDR fact sheets, 2026-Q1",
            "industry_breakdown": "State Street SPDR ETF fact sheets, 2026-Q1",
            "trailing_pe": "yfinance live (sector SPDR ETF .info.trailingPE)",
            "forward_pe_sector": "Yardeni / FactSet 2026-Q1 public releases",
            "forward_pe_industry": (
                "yfinance live, market-cap-weighted aggregation across the "
                "industry's top constituents (≥80% AUM coverage). "
                "Industries not yet in INDUSTRY_TICKERS fall back to inheriting "
                "the parent sector's fwd P/E."
            ),
            "ust10y_yield": "yfinance ^TNX (CBOE 10Y Treasury Yield Index), live close",
            "earnings_yield": "EY = 1 / forward P/E (盈利收益率, 类似债券 yield)",
            "erp": "ERP = EY - 10Y Treasury yield (Equity Risk Premium 简化版, 越高股票相对债券越有吸引力)",
            "implied_peg": "PEG = forward P/E / implied growth %, 其中 implied_growth% = (trail/fwd - 1) * 100",
            "composite_score": (
                "Composite Value Score (简化 2-factor): "
                "score = 0.6 × z(1/PEG) + 0.4 × z(ERP), "
                "z-score 在 11 个板块内部标准化. 越高 = 综合性价比越好. "
                "完整 3-factor 版还需历史 PE z-score (需付费源 / 手工 Yardeni 数据)."
            ),
            "historical_prices": (
                "yfinance full-history monthly closes (auto-adjusted), 用于价格走势图. "
                "覆盖范围: 9 核心 sector ETF (XLK/XLF/XLV/XLY/XLP/XLI/XLB/XLE/XLU) "
                "1998-12 起, XLRE 2015-10 起, XLC 2018-06 起 (GICS 改版). "
                "注: 这不是历史 PE 时间序列, 仅是价格走势; 因 yfinance 对 ETF "
                "无历史 EPS, 真历史 PE 需要付费源 (FactSet/Bloomberg) 或 SimFin/AV 重建."
            ),
            "sector_weight_history": (
                "Per-sector approx SPX weight, monthly, 1998-12+. "
                "重建公式: w_i(t) = w_i(today) × (ETF_i(t)/ETF_i(today)) ÷ (SPY(t)/SPY(today)), "
                "再 normalize 使各时点 11 板块加和 = 100%. 假设 sector 内 share-count 比例稳定. "
                "近似精度: 5y ±0.5pp, 27y 长期 ±2pp 量级 (有 GICS 改版导致结构性偏差, 主要是 2018 XLC 重组). "
                "真实历史 SPX 板块权重需要 S&P/FactSet 付费源."
            ),
            "sector_rotation": (
                "板块在 NBER 衰退/复苏 regime 下的累计回报 + 相对 SPY 的 alpha. "
                "Recession 期 = NBER 顶到 NBER 底; Early Recovery = NBER 底 + 12 个月. "
                "覆盖 2001 dot-com / 2008 GFC / 2020 COVID 三次衰退. "
                "XLC (inception 2018), XLRE (inception 2015) 仅覆盖 2020 COVID. "
                "ETF 来自 yfinance auto-adjusted 月线."
            ),
        },
        "sectors": sectors,
    }


def main() -> None:
    payload = build_payload()
    # Atomic write: HTTP server may regen on a request thread that takes 30+s
    # (yfinance 200+ ticker fetch), and a concurrent browser refresh could read
    # a half-written file. Write to .tmp then os.replace to commit atomically.
    tmp = OUTPUT.with_suffix(OUTPUT.suffix + ".tmp")
    tmp.write_text(json.dumps(payload, indent=2, ensure_ascii=False))
    os.replace(tmp, OUTPUT)
    print(f"wrote {OUTPUT}")
    print(
        f"  sectors: {len(payload['sectors'])}  "
        f"weight_total: {payload['weight_total_check']}  "
        f"SPX fwd P/E: {payload['spx_forward_pe']}  "
        f"10Y: {payload['ust10y_yield_pct']}%  "
        f"SPX ERP: {payload['spx_erp_pct']}%"
    )


if __name__ == "__main__":
    main()
