"""生成前端自助回测用的数据 JSON.

结构:
{
    "start_date": "2010-09-09",
    "end_date":   "2026-04-22",
    "prices": [{ "date": "...", "VOO": 95.88, "TQQQ": 1.14, "CASH": 50.00 }, ...],
    "signals": [{ "date": "...", "action": "🔵 回场", "target_state": "FULL", "spx": 1220.0 }, ...]
}

前端 backtest.html 从 start_date 用户选择的日期开始, 用这些数据实时模拟仓位演化.
"""
from __future__ import annotations

import json
from pathlib import Path

import pandas as pd
import yfinance as yf

from sentiment_indicator.core.position import _classify_action

ROOT = Path(__file__).resolve().parent.parent
OUTPUT = Path(__file__).resolve().parent / "backtest_data.json"
TRADE_ACTIONS = ROOT / "output" / "spx" / "sell" / "trade_actions.csv"

START = "2010-09-07"
# yfinance end 是 exclusive, 用"明天"保证拿到今天及之前所有数据
END = (pd.Timestamp.now() + pd.Timedelta(days=1)).strftime("%Y-%m-%d")


def main():
    print(f"Downloading VOO/TQQQ/SGOV from {START} ...")
    data = yf.download(["VOO", "TQQQ", "SGOV", "BIL"], start=START, end=END,
                       auto_adjust=True, progress=False)
    closes = data["Close"]
    # SGOV 2020 前用 BIL 代替
    if "SGOV" in closes.columns and "BIL" in closes.columns:
        sgov_start = closes["SGOV"].first_valid_index()
        if sgov_start is not None:
            bil_scale = closes.loc[sgov_start, "SGOV"] / closes.loc[sgov_start, "BIL"]
            cash = closes["SGOV"].fillna(closes["BIL"] * bil_scale)
        else:
            cash = closes["BIL"]
    else:
        cash = closes["BIL"] if "BIL" in closes.columns else closes["SGOV"]

    prices = pd.DataFrame({
        "VOO": closes["VOO"],
        "TQQQ": closes["TQQQ"],
        "CASH": cash,
    }).dropna()
    print(f"  Price rows: {len(prices)}, range: {prices.index[0].date()} → {prices.index[-1].date()}")

    # 加载信号
    signals_df = pd.read_csv(TRADE_ACTIONS)
    signals_df["date"] = pd.to_datetime(signals_df["date"])
    signals_df = signals_df[(signals_df["date"] >= prices.index[0]) &
                             (signals_df["date"] <= prices.index[-1])].copy()

    # Map `动作` text → backtest target_state by first normalizing to the
    # canonical action type from `core.position._classify_action`, so web and
    # state machine never disagree on classification.
    #
    # 清仓重置 (`clear_reset`) is intentionally NOT mapped here: it is a
    # signal-cycle reset marker, not a portfolio trade, and backtests treat
    # it as a no-op. See doc/SELL_STRATEGY.md "清仓重置 vs backtest" section.
    _BACKTEST_TARGET_STATE = {
        "full_clear": "EMPTY",
        "upgrade_full": "EMPTY",
        "half_clear": "HALF",
        "buy": "FULL",
        "reentry": "FULL",
        "warn": "WARN",
    }

    def map_action(action_text: str) -> str | None:
        return _BACKTEST_TARGET_STATE.get(_classify_action(action_text))

    signals_df["target_state"] = signals_df["动作"].apply(map_action)
    signals_df = signals_df.dropna(subset=["target_state"])
    print(f"  Signal rows: {len(signals_df)}")

    # 构建 JSON
    prices_list = [
        {
            "date": d.strftime("%Y-%m-%d"),
            "VOO": round(float(r["VOO"]), 4),
            "TQQQ": round(float(r["TQQQ"]), 4),
            "CASH": round(float(r["CASH"]), 4),
        }
        for d, r in prices.iterrows()
    ]
    signals_list = [
        {
            "date": r["date"].strftime("%Y-%m-%d"),
            "action": r["动作"],
            "target_state": r["target_state"],
            "spx": round(float(r["SPX"]), 2) if pd.notna(r["SPX"]) else None,
        }
        for _, r in signals_df.iterrows()
    ]

    output = {
        "start_date": prices.index[0].strftime("%Y-%m-%d"),
        "end_date": prices.index[-1].strftime("%Y-%m-%d"),
        "generated_at": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
        "prices": prices_list,
        "signals": signals_list,
    }
    OUTPUT.write_text(json.dumps(output, ensure_ascii=False), encoding="utf-8")
    size_kb = OUTPUT.stat().st_size / 1024
    print(f"\n✅ 保存: {OUTPUT} ({size_kb:.0f} KB, {len(prices_list)} 日 × {len(signals_list)} 信号)")


if __name__ == "__main__":
    main()
