/**
 * AST-aware chunking support via web-tree-sitter.
 *
 * Provides language detection, AST break point extraction for supported
 * code file types, and a stub for future symbol extraction.
 *
 * All functions degrade gracefully: parse failures or unsupported languages
 * return empty arrays, falling back to regex-only chunking.
 *
 * ## Dependency Note
 *
 * Grammar packages (tree-sitter-typescript, etc.) are listed as
 * optionalDependencies with pinned versions. They ship native prebuilds
 * and source files (~72 MB total) but QMD only uses the .wasm files
 * (~5 MB). If install size becomes a concern, the .wasm files can be
 * bundled directly in the repo (e.g. assets/grammars/) and resolved
 * via import.meta.url instead of require.resolve(), eliminating the
 * grammar packages entirely.
 */

import { createRequire } from "node:module";
import { extname } from "node:path";
import type { BreakPoint } from "./store.js";

// web-tree-sitter types — imported dynamically to avoid top-level WASM init
type ParserType = import("web-tree-sitter").Parser;
type LanguageType = import("web-tree-sitter").Language;
type QueryType = import("web-tree-sitter").Query;

// =============================================================================
// Language Detection
// =============================================================================

export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust";

const EXTENSION_MAP: Record<string, SupportedLanguage> = {
  ".ts": "typescript",
  ".tsx": "tsx",
  ".js": "javascript",
  ".jsx": "tsx",
  ".mts": "typescript",
  ".cts": "typescript",
  ".mjs": "javascript",
  ".cjs": "javascript",
  ".py": "python",
  ".go": "go",
  ".rs": "rust",
};

/**
 * Detect language from file path extension.
 * Returns null for unsupported or unknown extensions (including .md).
 */
export function detectLanguage(filepath: string): SupportedLanguage | null {
  const ext = extname(filepath).toLowerCase();
  return EXTENSION_MAP[ext] ?? null;
}

// =============================================================================
// Grammar Resolution
// =============================================================================

/**
 * Maps language to the npm package and wasm filename for the grammar.
 */
const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string; version: string }> = {
  typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
  tsx:        { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm",        version: "0.23.2" },
  javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
  python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm",     version: "0.23.4" },
  go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm",         version: "0.23.4" },
  rust:       { pkg: "tree-sitter-rust",       wasm: "tree-sitter-rust.wasm",       version: "0.24.0" },
};

export function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string {
  const grammar = GRAMMAR_MAP[language];
  const detail = err instanceof Error ? err.message : String(err);
  return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
    `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
}

// =============================================================================
// Per-Language Query Definitions
// =============================================================================

/**
 * Tree-sitter S-expression queries for each language.
 * Each capture name maps to a break point score via SCORE_MAP.
 *
 * For TypeScript/JavaScript, we match export_statement wrappers to get the
 * correct start position (before `export`), plus bare declarations for
 * non-exported code.
 */
const LANGUAGE_QUERIES: Record<SupportedLanguage, string> = {
  typescript: `
    (export_statement) @export
    (class_declaration) @class
    (function_declaration) @func
    (method_definition) @method
    (interface_declaration) @iface
    (type_alias_declaration) @type
    (enum_declaration) @enum
    (import_statement) @import
    (lexical_declaration (variable_declarator value: (arrow_function))) @func
    (lexical_declaration (variable_declarator value: (function_expression))) @func
  `,
  tsx: `
    (export_statement) @export
    (class_declaration) @class
    (function_declaration) @func
    (method_definition) @method
    (interface_declaration) @iface
    (type_alias_declaration) @type
    (enum_declaration) @enum
    (import_statement) @import
    (lexical_declaration (variable_declarator value: (arrow_function))) @func
    (lexical_declaration (variable_declarator value: (function_expression))) @func
  `,
  javascript: `
    (export_statement) @export
    (class_declaration) @class
    (function_declaration) @func
    (method_definition) @method
    (import_statement) @import
    (lexical_declaration (variable_declarator value: (arrow_function))) @func
    (lexical_declaration (variable_declarator value: (function_expression))) @func
  `,
  python: `
    (class_definition) @class
    (function_definition) @func
    (decorated_definition) @decorated
    (import_statement) @import
    (import_from_statement) @import
  `,
  go: `
    (type_declaration) @type
    (function_declaration) @func
    (method_declaration) @method
    (import_declaration) @import
  `,
  rust: `
    (struct_item) @struct
    (impl_item) @impl
    (function_item) @func
    (trait_item) @trait
    (enum_item) @enum
    (use_declaration) @import
    (type_item) @type
    (mod_item) @mod
  `,
};

/**
 * Score mapping from capture names to break point scores.
 * Aligned with the markdown BREAK_PATTERNS scale (h1=100, h2=90, etc.)
 * so findBestCutoff() decay works unchanged.
 */
const SCORE_MAP: Record<string, number> = {
  class:     100,
  iface:     100,
  struct:    100,
  trait:     100,
  impl:      100,
  mod:       100,
  export:     90,
  func:       90,
  method:     90,
  decorated:  90,
  type:       80,
  enum:       80,
  import:     60,
};

// =============================================================================
// Parser Caching & Initialization
// =============================================================================

let ParserClass: typeof import("web-tree-sitter").Parser | null = null;
let LanguageClass: typeof import("web-tree-sitter").Language | null = null;
let QueryClass: typeof import("web-tree-sitter").Query | null = null;
let initPromise: Promise<void> | null = null;

/** Languages that have already failed to load — warn only once per process. */
const failedLanguages = new Set<string>();

/** Last grammar load error by language, for status output. */
const grammarLoadErrors = new Map<SupportedLanguage, string>();

/** Cached grammar load promises. */
const grammarCache = new Map<string, Promise<LanguageType>>();

/** Cached compiled queries per language. */
const queryCache = new Map<string, QueryType>();

/**
 * Initialize web-tree-sitter. Called once and cached.
 */
async function ensureInit(): Promise<void> {
  if (!initPromise) {
    initPromise = (async () => {
      const mod = await import("web-tree-sitter");
      ParserClass = mod.Parser;
      LanguageClass = mod.Language;
      QueryClass = mod.Query;
      await ParserClass.init();
    })();
  }
  return initPromise;
}

/**
 * Resolve the filesystem path to a grammar .wasm file.
 * Uses createRequire to resolve from installed dependency packages.
 */
function resolveGrammarPath(language: SupportedLanguage): string {
  const { pkg, wasm } = GRAMMAR_MAP[language];
  const require = createRequire(import.meta.url);
  return require.resolve(`${pkg}/${wasm}`);
}

/**
 * Load and cache a grammar for the given language.
 * Returns null on failure (logs once per language).
 */
async function loadGrammar(language: SupportedLanguage): Promise<LanguageType | null> {
  if (failedLanguages.has(language)) return null;

  const wasmKey = GRAMMAR_MAP[language].wasm;
  if (!grammarCache.has(wasmKey)) {
    grammarCache.set(wasmKey, (async () => {
      const path = resolveGrammarPath(language);
      return LanguageClass!.load(path);
    })());
  }

  try {
    return await grammarCache.get(wasmKey)!;
  } catch (err) {
    failedLanguages.add(language);
    grammarCache.delete(wasmKey);
    const message = formatGrammarLoadError(language, err);
    grammarLoadErrors.set(language, message);
    console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
    return null;
  }
}

/**
 * Get or create a compiled query for the given language.
 */
function getQuery(language: SupportedLanguage, grammar: LanguageType): QueryType {
  if (!queryCache.has(language)) {
    const source = LANGUAGE_QUERIES[language];
    const query = new QueryClass!(grammar, source);
    queryCache.set(language, query);
  }
  return queryCache.get(language)!;
}

// =============================================================================
// AST Break Point Extraction
// =============================================================================

/**
 * Parse a source file and return break points at AST node boundaries.
 *
 * Returns an empty array for unsupported languages, parse failures,
 * or grammar loading failures. Never throws.
 *
 * @param content - The file content to parse.
 * @param filepath - The file path (used for language detection).
 * @returns Array of BreakPoint objects suitable for merging with regex break points.
 */
export async function getASTBreakPoints(
  content: string,
  filepath: string,
): Promise<BreakPoint[]> {
  const language = detectLanguage(filepath);
  if (!language) return [];

  try {
    await ensureInit();

    const grammar = await loadGrammar(language);
    if (!grammar) return [];

    const parser = new ParserClass!();
    parser.setLanguage(grammar);

    const tree = parser.parse(content);
    if (!tree) {
      parser.delete();
      return [];
    }

    const query = getQuery(language, grammar);
    const captures = query.captures(tree.rootNode);

    // Deduplicate: at each byte position, keep the highest-scoring capture.
    // This handles cases like export_statement wrapping a class_declaration
    // at different offsets — we want the outermost (earliest) position.
    const seen = new Map<number, BreakPoint>();

    for (const cap of captures) {
      const pos = cap.node.startIndex;
      const score = SCORE_MAP[cap.name] ?? 20;
      const type = `ast:${cap.name}`;

      const existing = seen.get(pos);
      if (!existing || score > existing.score) {
        seen.set(pos, { pos, score, type });
      }
    }

    tree.delete();
    parser.delete();

    return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
  } catch (err) {
    console.warn(`[qmd] AST parse failed for ${filepath}, falling back to regex: ${err instanceof Error ? err.message : err}`);
    return [];
  }
}

// =============================================================================
// Health / Status
// =============================================================================

/**
 * Check which tree-sitter grammars are available.
 * Returns a status object for each supported language.
 */
export async function getASTStatus(): Promise<{
  available: boolean;
  languages: { language: SupportedLanguage; available: boolean; error?: string }[];
}> {
  const languages: { language: SupportedLanguage; available: boolean; error?: string }[] = [];

  try {
    await ensureInit();
  } catch (err) {
    return {
      available: false,
      languages: (Object.keys(GRAMMAR_MAP) as SupportedLanguage[]).map(lang => ({
        language: lang,
        available: false,
        error: `web-tree-sitter init failed: ${err instanceof Error ? err.message : err}`,
      })),
    };
  }

  for (const lang of Object.keys(GRAMMAR_MAP) as SupportedLanguage[]) {
    try {
      const grammar = await loadGrammar(lang);
      if (grammar) {
        // Also verify the query compiles
        getQuery(lang, grammar);
        languages.push({ language: lang, available: true });
      } else {
        languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
      }
    } catch (err) {
      languages.push({
        language: lang,
        available: false,
        error: err instanceof Error ? err.message : String(err),
      });
    }
  }

  return {
    available: languages.some(l => l.available),
    languages,
  };
}

// =============================================================================
// Symbol Extraction (Phase 2 Stub)
// =============================================================================

/**
 * Metadata about a code symbol within a chunk.
 * Stubbed for Phase 2 — always returns empty array in Phase 1.
 */
export interface SymbolInfo {
  name: string;
  kind: string;
  signature?: string;
  line: number;
}

/**
 * Extract symbol metadata for code within a byte range.
 * Stubbed for Phase 2 — returns empty array.
 */
export function extractSymbols(
  _content: string,
  _language: string,
  _startPos: number,
  _endPos: number,
): SymbolInfo[] {
  return [];
}