import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
REPO_ROOT = Path(__file__).resolve().parent.parent
FASTAPI_DIR = REPO_ROOT / "servers" / "fastapi"
NEXT_DIR = REPO_ROOT / "servers" / "nextjs"
NOTICE_PATH = REPO_ROOT / "NOTICE"
PY_LICENSE_CANDIDATES = [
"LICENSE",
"LICENSE.txt",
"LICENSE.md",
"LICENCE",
"COPYING",
"COPYING.txt",
"NOTICE",
"NOTICE.txt",
]
NODE_LICENSE_CANDIDATES = [
"LICENSE",
"LICENSE.txt",
"LICENSE.md",
"LICENCE",
"LICENCE.txt",
"COPYING",
"COPYING.txt",
"NOTICE",
"NOTICE.txt",
]
def read_text_safe(path: Path) -> str:
try:
return path.read_text(encoding="utf-8", errors="replace").strip()
except Exception:
return ""
def parse_rfc822_metadata(text: str) -> Dict[str, str]:
data: Dict[str, str] = {}
key: Optional[str] = None
for raw_line in text.splitlines():
if not raw_line:
key = None
continue
if raw_line[0] in " \t" and key:
data[key] += "\n" + raw_line.strip()
continue
if ":" in raw_line:
k, v = raw_line.split(":", 1)
key = k.strip()
data[key] = v.strip()
return data
def find_python_site_packages(venv_dir: Path) -> Optional[Path]:
lib_dir = venv_dir / "lib"
if lib_dir.exists():
for child in lib_dir.iterdir():
if child.is_dir() and child.name.startswith("python"):
sp = child / "site-packages"
if sp.exists():
return sp
sp = venv_dir / "Lib" / "site-packages"
if sp.exists():
return sp
return None
def detect_python_venv() -> Optional[Path]:
env_path = os.environ.get("NOTICE_PYTHON_VENV")
if env_path:
v = Path(env_path)
if v.exists():
return v
default = FASTAPI_DIR / ".venv"
if default.exists():
return default
active = os.environ.get("VIRTUAL_ENV")
if active and FASTAPI_DIR.as_posix() in Path(active).as_posix():
return Path(active)
return None
def scan_python_packages(site_packages_dir: Path) -> List[Dict[str, str]]:
entries: List[Dict[str, str]] = []
dist_infos = sorted(site_packages_dir.glob("*.dist-info"))
for dist in dist_infos:
metadata_path = dist / "METADATA"
if not metadata_path.exists():
continue
meta = parse_rfc822_metadata(read_text_safe(metadata_path))
name = meta.get("Name", "").strip()
version = meta.get("Version", "").strip()
license_name = meta.get("License", "").strip()
if not name:
base = dist.name[:-10]
if "-" in base:
parts = base.rsplit("-", 1)
if len(parts) == 2:
name = parts[0]
version = version or parts[1]
author = meta.get("Author", meta.get("Maintainer", meta.get("Author-email", ""))).strip()
license_text = ""
for cand in PY_LICENSE_CANDIDATES:
p = dist / cand
if p.exists():
license_text = read_text_safe(p)
if license_text:
break
if not license_text:
record = dist / "RECORD"
if record.exists():
for line in read_text_safe(record).splitlines():
path_part = line.split(",", 1)[0]
lower = path_part.lower()
if any(token in lower for token in ["license", "licence", "copying", "notice"]):
target = site_packages_dir / path_part
if target.exists():
license_text = read_text_safe(target)
if license_text:
break
if not license_text and license_name:
license_text = f"License field from METADATA:\n{license_name}"
entries.append({
"name": name or dist.name,
"version": version,
"license": license_name,
"author": author,
"license_text": license_text,
})
entries.sort(key=lambda e: (e["name"].lower(), e["version"]))
return entries
def find_license_file_in_dir(base_dir: Path, depth_limit: int = 2) -> Optional[Path]:
for cand in NODE_LICENSE_CANDIDATES:
p = base_dir / cand
if p.exists():
return p
for child in base_dir.iterdir():
if child.is_file() and child.name.lower() == cand.lower():
return child
def walk(dir_path: Path, depth: int) -> Optional[Path]:
if depth > depth_limit:
return None
try:
it = list(dir_path.iterdir())
except Exception:
return None
for child in it:
name_lower = child.name.lower()
if child.is_dir():
if child.name == "node_modules" or child.name.startswith('.'):
continue
found = walk(child, depth + 1)
if found:
return found
else:
if any(tok in name_lower for tok in ["license", "licence", "copying", "notice"]):
return child
return None
return walk(base_dir, 0)
def scan_node_modules(node_modules_dir: Path) -> List[Dict[str, str]]:
entries: List[Dict[str, str]] = []
seen: set[str] = set()
def visit_pkg(pkg_dir: Path):
pkg_json = pkg_dir / "package.json"
if not pkg_json.exists():
return
try:
data = json.loads(read_text_safe(pkg_json) or "{}")
except Exception:
return
name = data.get("name") or pkg_dir.name
version = str(data.get("version") or "")
key = f"{name}@{version}"
if key in seen:
return
seen.add(key)
license_name = ""
lic_field = data.get("license")
if isinstance(lic_field, str):
license_name = lic_field
elif isinstance(lic_field, dict):
license_name = lic_field.get("type", "")
elif isinstance(data.get("licenses"), list):
license_name = ", ".join([str(x.get("type", "")) for x in data["licenses"] if isinstance(x, dict)])
author = ""
a = data.get("author")
if isinstance(a, str):
author = a
elif isinstance(a, dict):
author = a.get("name", "")
license_text = ""
lic_file = find_license_file_in_dir(pkg_dir, depth_limit=2)
if lic_file:
license_text = read_text_safe(lic_file)
entries.append({
"name": name,
"version": version,
"license": license_name,
"author": author,
"license_text": license_text,
})
def walk_node_modules(base: Path):
if not base.exists():
return
for entry in base.iterdir():
if not entry.is_dir():
continue
if entry.name == ".bin":
continue
if entry.name.startswith("@"):
for scoped in entry.iterdir():
if scoped.is_dir():
visit_pkg(scoped)
nested = scoped / "node_modules"
walk_node_modules(nested)
continue
visit_pkg(entry)
nested = entry / "node_modules"
walk_node_modules(nested)
walk_node_modules(node_modules_dir)
entries.sort(key=lambda e: (e["name"].lower(), e["version"]))
return entries
def format_section(title: str, entries: List[Dict[str, str]]) -> str:
header = [
"-------------------------------------",
title,
"-------------------------------------",
"",
]
lines: List[str] = ["\n".join(header)]
for e in entries:
block = [
e.get("name", "").strip(),
e.get("version", "").strip(),
e.get("license", "").strip(),
e.get("author", "").strip(),
"",
(e.get("license_text", "") or "LICENSE TEXT NOT FOUND").strip(),
"",
"",
]
lines.append("\n".join(block))
return "".join(lines).rstrip() + "\n"
def main():
import argparse
parser = argparse.ArgumentParser(description="Rebuild NOTICE from installed packages")
parser.add_argument("--python-venv", dest="python_venv", default=None, help="Path to Python venv to scan")
parser.add_argument("--node-modules", dest="node_modules", default=None, help="Path to node_modules to scan")
args = parser.parse_args()
python_entries: List[Dict[str, str]] = []
node_entries: List[Dict[str, str]] = []
venv = Path(args.python_venv) if args.python_venv else detect_python_venv()
if venv:
sp = find_python_site_packages(venv)
if sp and sp.exists():
python_entries = scan_python_packages(sp)
else:
print(f"Warning: site-packages not found under {venv}", file=sys.stderr)
else:
print("Warning: Python venv not found. Set NOTICE_PYTHON_VENV or create servers/fastapi/.venv", file=sys.stderr)
node_modules_dir = Path(args.node_modules or os.environ.get("NOTICE_NODE_MODULES") or (NEXT_DIR / "node_modules"))
if node_modules_dir.exists():
node_entries = scan_node_modules(node_modules_dir)
else:
print(f"Warning: node_modules not found at {node_modules_dir}", file=sys.stderr)
parts: List[str] = []
if python_entries:
parts.append(format_section("PYTHON PACKAGES", python_entries))
if node_entries:
parts.append(format_section("NODE PACKAGES", node_entries))
if not parts:
print("Error: No sections generated. Ensure .venv and node_modules exist.", file=sys.stderr)
sys.exit(1)
content = "\n".join(parts)
NOTICE_PATH.write_text(content, encoding="utf-8")
print("NOTICE rebuilt from installed packages")
if __name__ == "__main__":
main()