/**
 * Low-level URL fetcher for `web_fetch`.
 *
 * Implements behaviours W3 (HTTP→HTTPS upgrade), W4 (10 MB content cap),
 * W5 (60 s timeout), W6 (10 redirect hops), W7 (permitted redirect),
 * W8 (LRU cache + TTL), W9 (egress proxy detection — skipped, see notes
 * in §5.2), W10 (binary persistence — minimal stub: persistedPath set to
 * `undefined` because PilotDeck's MCP storage is feature-gated; behaviour
 * intentional_difference recorded in checklist), W11 (HTML→Markdown via
 * turndown), W12 (truncation at 100 KB).
 */

import {
  type FetchedCacheEntry,
  URL_CACHE,
} from "./urlContentCache.js";
import {
  isPermittedRedirect,
  upgradeHttpToHttps,
  validateURL,
} from "./urlValidation.js";

export const MAX_HTTP_CONTENT_LENGTH = 10 * 1024 * 1024;
export const FETCH_TIMEOUT_MS = 60_000;
export const MAX_REDIRECTS = 10;
export const MAX_MARKDOWN_LENGTH = 100_000;
export const WEB_FETCH_USER_AGENT =
  "PilotDeck/0.1 (+https://github.com/pilotdeck) WebFetch";

export type RedirectInfo = {
  type: "redirect";
  originalUrl: string;
  redirectUrl: string;
  statusCode: number;
};

type FetchedHttpRaw = {
  status: number;
  statusText: string;
  headers: Record<string, string>;
  buffer: Buffer;
};

function isRedirectInfoInternal(
  value: FetchedHttpRaw | RedirectInfo,
): value is RedirectInfo {
  return (value as RedirectInfo).type === "redirect";
}

export type WebFetchHttpResult =
  | (FetchedCacheEntry & { fromCache: boolean })
  | RedirectInfo;

export type FetchHook = (
  url: string,
  init: { headers: Record<string, string>; signal: AbortSignal },
) => Promise<{
  status: number;
  statusText: string;
  headers: Record<string, string>;
  arrayBuffer: () => Promise<ArrayBuffer>;
}>;

const defaultFetchHook: FetchHook = async (url, init) => {
  const res = await fetch(url, {
    method: "GET",
    redirect: "manual",
    headers: init.headers,
    signal: init.signal,
  });
  const headers: Record<string, string> = {};
  res.headers.forEach((v, k) => {
    headers[k.toLowerCase()] = v;
  });
  return {
    status: res.status,
    statusText: res.statusText,
    headers,
    arrayBuffer: () => res.arrayBuffer(),
  };
};

let activeFetchHook: FetchHook = defaultFetchHook;

export function __setWebFetchHookForTesting(hook: FetchHook | null): void {
  activeFetchHook = hook ?? defaultFetchHook;
}

let turndownPromise: Promise<{ turndown(html: string): string }> | undefined;
async function getTurndown(): Promise<{ turndown(html: string): string }> {
  if (!turndownPromise) {
    turndownPromise = import("turndown").then((mod) => {
      const Ctor =
        (mod as unknown as { default?: new () => { turndown(html: string): string } }).default ??
        (mod as unknown as new () => { turndown(html: string): string });
      return new Ctor();
    });
  }
  return turndownPromise;
}

async function fetchWithRedirects(
  url: string,
  signal: AbortSignal,
  depth: number,
): Promise<FetchedHttpRaw | RedirectInfo> {
  if (depth > MAX_REDIRECTS) {
    throw new Error(`Too many redirects (exceeded ${MAX_REDIRECTS})`);
  }
  const timeout = new AbortController();
  const timer = setTimeout(() => timeout.abort(new Error("fetch timeout")), FETCH_TIMEOUT_MS);
  const onParentAbort = () => timeout.abort();
  signal.addEventListener("abort", onParentAbort, { once: true });

  let res: Awaited<ReturnType<FetchHook>>;
  try {
    res = await activeFetchHook(url, {
      headers: {
        Accept: "text/markdown, text/html, */*",
        "User-Agent": WEB_FETCH_USER_AGENT,
      },
      signal: timeout.signal,
    });
  } finally {
    clearTimeout(timer);
    signal.removeEventListener("abort", onParentAbort);
  }

  if ([301, 302, 307, 308].includes(res.status)) {
    const location = res.headers["location"];
    if (!location) throw new Error("Redirect missing Location header");
    const redirectUrl = new URL(location, url).toString();
    if (isPermittedRedirect(url, redirectUrl)) {
      return fetchWithRedirects(redirectUrl, signal, depth + 1);
    }
    return {
      type: "redirect",
      originalUrl: url,
      redirectUrl,
      statusCode: res.status,
    };
  }

  if (
    res.status === 403 &&
    res.headers["x-proxy-error"] === "blocked-by-allowlist"
  ) {
    const hostname = new URL(url).hostname;
    throw new Error(
      JSON.stringify({
        error_type: "EGRESS_BLOCKED",
        domain: hostname,
        message: `Access to ${hostname} is blocked by the network egress proxy.`,
      }),
    );
  }

  const ab = await res.arrayBuffer();
  if (ab.byteLength > MAX_HTTP_CONTENT_LENGTH) {
    throw new Error(
      `Response exceeds maximum content length of ${MAX_HTTP_CONTENT_LENGTH} bytes`,
    );
  }
  const buffer = Buffer.from(ab);
  const out: FetchedHttpRaw = {
    status: res.status,
    statusText: res.statusText,
    headers: res.headers,
    buffer,
  };
  return out;
}

function isBinaryContentType(contentType: string): boolean {
  const lower = contentType.toLowerCase();
  if (lower.includes("text/")) return false;
  if (lower.includes("application/json")) return false;
  if (lower.includes("application/xml")) return false;
  if (lower.includes("application/javascript")) return false;
  if (lower.includes("application/x-www-form-urlencoded")) return false;
  if (lower.includes("xml") && !lower.includes("octet")) return false;
  return (
    lower.includes("application/") ||
    lower.includes("image/") ||
    lower.includes("audio/") ||
    lower.includes("video/")
  );
}

export async function getURLMarkdownContent(
  url: string,
  signal: AbortSignal,
): Promise<WebFetchHttpResult> {
  if (!validateURL(url)) {
    throw new Error("Invalid URL");
  }

  const cached = URL_CACHE.get(url);
  if (cached) {
    return { ...cached, fromCache: true };
  }

  const { upgraded } = upgradeHttpToHttps(url);
  const result = await fetchWithRedirects(upgraded, signal, 0);
  if (isRedirectInfoInternal(result)) {
    return result;
  }

  const { status, statusText, headers, buffer } = result;
  const contentType = headers["content-type"] ?? "";
  const bytes = buffer.length;

  let content: string;
  let contentBytes: number;
  if (contentType.includes("text/html")) {
    const html = buffer.toString("utf-8");
    const td = await getTurndown();
    content = td.turndown(html);
    contentBytes = Buffer.byteLength(content);
  } else if (isBinaryContentType(contentType)) {
    content = `[Binary ${contentType || "application/octet-stream"} content (${bytes} bytes) — not displayed]`;
    contentBytes = Buffer.byteLength(content);
  } else {
    content = buffer.toString("utf-8");
    contentBytes = bytes;
  }

  const entry: FetchedCacheEntry = {
    bytes,
    code: status,
    codeText: statusText,
    content,
    contentType,
  };
  URL_CACHE.set(url, entry, contentBytes);
  return { ...entry, fromCache: false };
}

export function truncateMarkdown(markdown: string): string {
  if (markdown.length <= MAX_MARKDOWN_LENGTH) return markdown;
  return (
    markdown.slice(0, MAX_MARKDOWN_LENGTH) + "\n\n[Content truncated due to length...]"
  );
}