#!/usr/bin/env python3
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple

REPO_ROOT = Path(__file__).resolve().parent.parent
FASTAPI_DIR = REPO_ROOT / "servers" / "fastapi"
NEXT_DIR = REPO_ROOT / "servers" / "nextjs"
NOTICE_PATH = REPO_ROOT / "NOTICE"

PY_LICENSE_CANDIDATES = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENCE",
    "COPYING",
    "COPYING.txt",
    "NOTICE",
    "NOTICE.txt",
]

NODE_LICENSE_CANDIDATES = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENCE",
    "LICENCE.txt",
    "COPYING",
    "COPYING.txt",
    "NOTICE",
    "NOTICE.txt",
]


def read_text_safe(path: Path) -> str:
    try:
        return path.read_text(encoding="utf-8", errors="replace").strip()
    except Exception:
        return ""


def parse_rfc822_metadata(text: str) -> Dict[str, str]:
    data: Dict[str, str] = {}
    key: Optional[str] = None
    for raw_line in text.splitlines():
        if not raw_line:
            key = None
            continue
        if raw_line[0] in " \t" and key:
            data[key] += "\n" + raw_line.strip()
            continue
        if ":" in raw_line:
            k, v = raw_line.split(":", 1)
            key = k.strip()
            data[key] = v.strip()
    return data


def find_python_site_packages(venv_dir: Path) -> Optional[Path]:
    # Linux/mac
    lib_dir = venv_dir / "lib"
    if lib_dir.exists():
        for child in lib_dir.iterdir():
            if child.is_dir() and child.name.startswith("python"):
                sp = child / "site-packages"
                if sp.exists():
                    return sp
    # Windows
    sp = venv_dir / "Lib" / "site-packages"
    if sp.exists():
        return sp
    return None


def detect_python_venv() -> Optional[Path]:
    env_path = os.environ.get("NOTICE_PYTHON_VENV")
    if env_path:
        v = Path(env_path)
        if v.exists():
            return v
    default = FASTAPI_DIR / ".venv"
    if default.exists():
        return default
    active = os.environ.get("VIRTUAL_ENV")
    if active and FASTAPI_DIR.as_posix() in Path(active).as_posix():
        return Path(active)
    return None


def scan_python_packages(site_packages_dir: Path) -> List[Dict[str, str]]:
    entries: List[Dict[str, str]] = []
    dist_infos = sorted(site_packages_dir.glob("*.dist-info"))
    for dist in dist_infos:
        metadata_path = dist / "METADATA"
        if not metadata_path.exists():
            continue
        meta = parse_rfc822_metadata(read_text_safe(metadata_path))
        name = meta.get("Name", "").strip()
        version = meta.get("Version", "").strip()
        license_name = meta.get("License", "").strip()
        if not name:
            # Fallback to folder name pattern
            # e.g., requests-2.32.3.dist-info
            base = dist.name[:-10]
            if "-" in base:
                parts = base.rsplit("-", 1)
                if len(parts) == 2:
                    name = parts[0]
                    version = version or parts[1]
        author = meta.get("Author", meta.get("Maintainer", meta.get("Author-email", ""))).strip()

        # License text candidates inside dist-info
        license_text = ""
        for cand in PY_LICENSE_CANDIDATES:
            p = dist / cand
            if p.exists():
                license_text = read_text_safe(p)
                if license_text:
                    break

        # Search via RECORD for license files elsewhere
        if not license_text:
            record = dist / "RECORD"
            if record.exists():
                for line in read_text_safe(record).splitlines():
                    path_part = line.split(",", 1)[0]
                    lower = path_part.lower()
                    if any(token in lower for token in ["license", "licence", "copying", "notice"]):
                        target = site_packages_dir / path_part
                        if target.exists():
                            license_text = read_text_safe(target)
                            if license_text:
                                break

        # As last resort, embed the License: field content
        if not license_text and license_name:
            license_text = f"License field from METADATA:\n{license_name}"

        entries.append({
            "name": name or dist.name,
            "version": version,
            "license": license_name,
            "author": author,
            "license_text": license_text,
        })

    # Sort by name for stability
    entries.sort(key=lambda e: (e["name"].lower(), e["version"]))
    return entries


def find_license_file_in_dir(base_dir: Path, depth_limit: int = 2) -> Optional[Path]:
    # First, try immediate candidates
    for cand in NODE_LICENSE_CANDIDATES:
        p = base_dir / cand
        if p.exists():
            return p
        # case-insensitive check
        for child in base_dir.iterdir():
            if child.is_file() and child.name.lower() == cand.lower():
                return child

    # Recursive limited-depth scan excluding nested node_modules
    def walk(dir_path: Path, depth: int) -> Optional[Path]:
        if depth > depth_limit:
            return None
        try:
            it = list(dir_path.iterdir())
        except Exception:
            return None
        for child in it:
            name_lower = child.name.lower()
            if child.is_dir():
                if child.name == "node_modules" or child.name.startswith('.'):
                    continue
                found = walk(child, depth + 1)
                if found:
                    return found
            else:
                if any(tok in name_lower for tok in ["license", "licence", "copying", "notice"]):
                    return child
        return None

    return walk(base_dir, 0)


def scan_node_modules(node_modules_dir: Path) -> List[Dict[str, str]]:
    entries: List[Dict[str, str]] = []
    seen: set[str] = set()

    def visit_pkg(pkg_dir: Path):
        pkg_json = pkg_dir / "package.json"
        if not pkg_json.exists():
            return
        try:
            data = json.loads(read_text_safe(pkg_json) or "{}")
        except Exception:
            return
        name = data.get("name") or pkg_dir.name
        version = str(data.get("version") or "")
        key = f"{name}@{version}"
        if key in seen:
            return
        seen.add(key)

        license_name = ""
        lic_field = data.get("license")
        if isinstance(lic_field, str):
            license_name = lic_field
        elif isinstance(lic_field, dict):
            license_name = lic_field.get("type", "")
        elif isinstance(data.get("licenses"), list):
            license_name = ", ".join([str(x.get("type", "")) for x in data["licenses"] if isinstance(x, dict)])

        author = ""
        a = data.get("author")
        if isinstance(a, str):
            author = a
        elif isinstance(a, dict):
            author = a.get("name", "")

        license_text = ""
        lic_file = find_license_file_in_dir(pkg_dir, depth_limit=2)
        if lic_file:
            license_text = read_text_safe(lic_file)

        entries.append({
            "name": name,
            "version": version,
            "license": license_name,
            "author": author,
            "license_text": license_text,
        })

    def walk_node_modules(base: Path):
        if not base.exists():
            return
        for entry in base.iterdir():
            if not entry.is_dir():
                continue
            if entry.name == ".bin":
                continue
            if entry.name.startswith("@"):  # scoped packages
                for scoped in entry.iterdir():
                    if scoped.is_dir():
                        visit_pkg(scoped)
                        # nested node_modules inside the package
                        nested = scoped / "node_modules"
                        walk_node_modules(nested)
                continue
            visit_pkg(entry)
            nested = entry / "node_modules"
            walk_node_modules(nested)

    walk_node_modules(node_modules_dir)
    # Sort by package name
    entries.sort(key=lambda e: (e["name"].lower(), e["version"]))
    return entries


def format_section(title: str, entries: List[Dict[str, str]]) -> str:
    header = [
        "-------------------------------------",
        title,
        "-------------------------------------",
        "",
    ]
    lines: List[str] = ["\n".join(header)]
    for e in entries:
        block = [
            e.get("name", "").strip(),
            e.get("version", "").strip(),
            e.get("license", "").strip(),
            e.get("author", "").strip(),
            "",
            (e.get("license_text", "") or "LICENSE TEXT NOT FOUND").strip(),
            "",
            "",
        ]
        lines.append("\n".join(block))
    return "".join(lines).rstrip() + "\n"


def main():
    # Optional CLI overrides
    import argparse
    parser = argparse.ArgumentParser(description="Rebuild NOTICE from installed packages")
    parser.add_argument("--python-venv", dest="python_venv", default=None, help="Path to Python venv to scan")
    parser.add_argument("--node-modules", dest="node_modules", default=None, help="Path to node_modules to scan")
    args = parser.parse_args()
    python_entries: List[Dict[str, str]] = []
    node_entries: List[Dict[str, str]] = []

    # Python scan
    venv = Path(args.python_venv) if args.python_venv else detect_python_venv()
    if venv:
        sp = find_python_site_packages(venv)
        if sp and sp.exists():
            python_entries = scan_python_packages(sp)
        else:
            print(f"Warning: site-packages not found under {venv}", file=sys.stderr)
    else:
        print("Warning: Python venv not found. Set NOTICE_PYTHON_VENV or create servers/fastapi/.venv", file=sys.stderr)

    # Node scan
    node_modules_dir = Path(args.node_modules or os.environ.get("NOTICE_NODE_MODULES") or (NEXT_DIR / "node_modules"))
    if node_modules_dir.exists():
        node_entries = scan_node_modules(node_modules_dir)
    else:
        print(f"Warning: node_modules not found at {node_modules_dir}", file=sys.stderr)

    # Build NOTICE content
    parts: List[str] = []
    if python_entries:
        parts.append(format_section("PYTHON PACKAGES", python_entries))
    if node_entries:
        parts.append(format_section("NODE PACKAGES", node_entries))
    if not parts:
        print("Error: No sections generated. Ensure .venv and node_modules exist.", file=sys.stderr)
        sys.exit(1)

    content = "\n".join(parts)
    NOTICE_PATH.write_text(content, encoding="utf-8")
    print("NOTICE rebuilt from installed packages")


if __name__ == "__main__":
    main()