/**
* intent.test.ts - Tests for the intent feature
*
* Tests cover:
* - extractIntentTerms: stop word filtering, punctuation, acronyms, edge cases
* - extractSnippet with intent: disambiguation across multiple document sections
* - parseStructuredQuery with intent: lines (parsing, validation, error cases)
* - Chunk selection scoring with intent
* - Strong-signal bypass when intent is present
* - Intent constants
*
* Run with: npx vitest run test/intent.test.ts
*/
import { describe, test, expect } from "vitest";
import {
extractSnippet,
extractIntentTerms,
INTENT_WEIGHT_SNIPPET,
INTENT_WEIGHT_CHUNK,
type ExpandedQuery,
} from "../src/store.js";
// =============================================================================
// parseStructuredQuery — duplicated from src/cli/qmd.ts for unit testing
// (qmd.ts doesn't export it since it's a CLI internal)
// =============================================================================
interface ParsedStructuredQuery {
searches: ExpandedQuery[];
intent?: string;
}
function parseStructuredQuery(query: string): ParsedStructuredQuery | null {
const rawLines = query.split('\n').map((line, idx) => ({
raw: line,
trimmed: line.trim(),
number: idx + 1,
})).filter(line => line.trimmed.length > 0);
if (rawLines.length === 0) return null;
const prefixRe = /^(lex|vec|hyde):\s*/i;
const expandRe = /^expand:\s*/i;
const intentRe = /^intent:\s*/i;
const typed: ExpandedQuery[] = [];
let intent: string | undefined;
for (const line of rawLines) {
if (expandRe.test(line.trimmed)) {
if (rawLines.length > 1) {
throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
}
const text = line.trimmed.replace(expandRe, '').trim();
if (!text) {
throw new Error('expand: query must include text.');
}
return null;
}
if (intentRe.test(line.trimmed)) {
if (intent !== undefined) {
throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
}
const text = line.trimmed.replace(intentRe, '').trim();
if (!text) {
throw new Error(`Line ${line.number}: intent: must include text.`);
}
intent = text;
continue;
}
const match = line.trimmed.match(prefixRe);
if (match) {
const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde';
const text = line.trimmed.slice(match[0].length).trim();
if (!text) {
throw new Error(`Line ${line.number} (${type}:) must include text.`);
}
if (/\r|\n/.test(text)) {
throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
}
typed.push({ type, query: text, line: line.number });
continue;
}
if (rawLines.length === 1) {
return null;
}
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
}
if (intent && typed.length === 0) {
throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
}
return typed.length > 0 ? { searches: typed, intent } : null;
}
// =============================================================================
// extractIntentTerms
// =============================================================================
describe("extractIntentTerms", () => {
test("filters stop words", () => {
// "looking", "for", "notes", "about" are stop words
expect(extractIntentTerms("looking for notes about latency optimization"))
.toEqual(["latency", "optimization"]);
});
test("filters common function words", () => {
// "what", "is", "the", "to", "find" are stop words; "best", "way" survive
expect(extractIntentTerms("what is the best way to find"))
.toEqual(["best", "way"]);
});
test("preserves domain terms", () => {
expect(extractIntentTerms("web performance latency page load times"))
.toEqual(["web", "performance", "latency", "page", "load", "times"]);
});
test("handles surrounding punctuation with Unicode awareness", () => {
expect(extractIntentTerms("personal health, fitness, and endurance"))
.toEqual(["personal", "health", "fitness", "endurance"]);
});
test("preserves internal hyphens", () => {
expect(extractIntentTerms("self-hosted real-time (decision-making)"))
.toEqual(["self-hosted", "real-time", "decision-making"]);
});
test("short domain terms survive (API, SQL, LLM)", () => {
expect(extractIntentTerms("API design for LLM agents"))
.toEqual(["api", "design", "llm", "agents"]);
});
test("returns empty for empty input", () => {
expect(extractIntentTerms("")).toEqual([]);
expect(extractIntentTerms(" ")).toEqual([]);
});
test("filters single-char terms", () => {
const terms = extractIntentTerms("a b c web");
expect(terms).toEqual(["web"]);
});
test("all stop words returns empty", () => {
const terms = extractIntentTerms("the and or but in on at to for of with by");
expect(terms).toEqual([]);
});
test("preserves 2-char domain terms (CI, CD, DB)", () => {
const terms = extractIntentTerms("SQL CI CD DB");
expect(terms).toContain("sql");
expect(terms).toContain("ci");
expect(terms).toContain("cd");
expect(terms).toContain("db");
});
test("lowercases all terms", () => {
const terms = extractIntentTerms("WebSocket HTTP REST");
expect(terms).toContain("websocket");
expect(terms).toContain("http");
expect(terms).toContain("rest");
});
test("handles C++ style punctuation", () => {
const terms = extractIntentTerms("C++, performance! optimization.");
expect(terms).toContain("performance");
expect(terms).toContain("optimization");
});
});
// =============================================================================
// extractSnippet with intent — disambiguation
// =============================================================================
describe("extractSnippet with intent", () => {
// Each section contains "performance" so the query score is tied (1.0 each).
// Intent terms (INTENT_WEIGHT_SNIPPET) then break the tie toward the relevant section.
const body = [
"# Notes on Various Topics",
"",
"## Web Performance Section",
"Web performance means optimizing page load times and Core Web Vitals.",
"Reduce latency, improve rendering speed, and measure performance budgets.",
"",
"## Team Performance Section",
"Team performance depends on trust, psychological safety, and feedback.",
"Build culture where performance reviews drive growth not fear.",
"",
"## Health Performance Section",
"Health performance comes from consistent exercise, sleep, and endurance.",
"Track fitness metrics, optimize recovery, and monitor healthspan.",
].join("\n");
test("without intent, anchors on query terms only", () => {
const result = extractSnippet(body, "performance", 500);
// "performance" appears in title and multiple sections — should anchor on first match
expect(result.snippet).toContain("Performance");
});
test("with web-perf intent, prefers web performance section", () => {
const result = extractSnippet(
body, "performance", 500,
undefined, undefined,
"Looking for notes about web performance, latency, and page load times"
);
expect(result.snippet).toMatch(/latency|page.*load|Core Web Vitals/i);
});
test("with health intent, prefers health section", () => {
const result = extractSnippet(
body, "performance", 500,
undefined, undefined,
"Looking for notes about personal health, fitness, and endurance"
);
expect(result.snippet).toMatch(/health|fitness|endurance|exercise/i);
});
test("with team intent, prefers team section", () => {
const result = extractSnippet(
body, "performance", 500,
undefined, undefined,
"Looking for notes about building high-performing teams and culture"
);
expect(result.snippet).toMatch(/team|culture|trust|feedback/i);
});
test("intent does not override strong query match", () => {
// Query "Core Web Vitals" is very specific — intent shouldn't pull away from it
const result = extractSnippet(
body, "Core Web Vitals", 500,
undefined, undefined,
"Looking for notes about health and fitness"
);
expect(result.snippet).toContain("Core Web Vitals");
});
test("absent intent produces same result as undefined", () => {
const withoutIntent = extractSnippet(body, "performance", 500);
const withUndefined = extractSnippet(body, "performance", 500, undefined, undefined, undefined);
expect(withoutIntent.line).toBe(withUndefined.line);
expect(withoutIntent.snippet).toBe(withUndefined.snippet);
});
test("intent with no matching terms falls back to query-only scoring", () => {
const result = extractSnippet(
body, "performance", 500,
undefined, undefined,
"quantum computing and entanglement"
);
expect(result.snippet).toContain("Performance");
expect(result.snippet.length).toBeGreaterThan(0);
});
test("intent works with chunk position", () => {
const webPerfStart = body.indexOf("## Web Performance");
const result = extractSnippet(
body, "performance", 500,
webPerfStart, 200,
"web page load times"
);
expect(result.snippet).toMatch(/Web Performance|Core Web Vitals|Page load/i);
});
});
// =============================================================================
// extractSnippet — intent weight verification
// =============================================================================
describe("extractSnippet intent weight behavior", () => {
// Document where query term appears on every line but intent terms differ
const body = [
"performance metrics for team velocity",
"performance metrics for web latency",
"performance metrics for athletic endurance",
].join("\n");
test("intent breaks tie when query matches all lines equally", () => {
const noIntent = extractSnippet(body, "performance metrics", 500);
// Without intent, first line wins (all equal score)
expect(noIntent.line).toBe(1);
const withIntent = extractSnippet(
body, "performance metrics", 500,
undefined, undefined,
"web latency and page speed"
);
// Intent terms "web", "latency" match line 2
expect(withIntent.snippet).toContain("web latency");
});
});
// =============================================================================
// Chunk selection scoring with intent
// =============================================================================
describe("intent keyword extraction logic", () => {
// Mirrors the chunk selection scoring in hybridQuery, using the shared
// extractIntentTerms helper and INTENT_WEIGHT_CHUNK constant.
function scoreChunk(text: string, query: string, intent?: string): number {
const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
const intentTerms = intent ? extractIntentTerms(intent) : [];
const lower = text.toLowerCase();
const qScore = queryTerms.reduce((acc, term) => acc + (lower.includes(term) ? 1 : 0), 0);
const iScore = intentTerms.reduce((acc, term) => acc + (lower.includes(term) ? INTENT_WEIGHT_CHUNK : 0), 0);
return qScore + iScore;
}
const chunks = [
"Web performance: optimize page load times, reduce latency, improve rendering pipeline.",
"Team performance: build trust, give feedback, set clear expectations for the group.",
"Health performance: exercise regularly, sleep 8 hours, manage stress for endurance.",
];
test("without intent, all chunks score equally on 'performance'", () => {
const scores = chunks.map(c => scoreChunk(c, "performance"));
// All contain "performance", so all score 1
expect(scores[0]).toBe(scores[1]);
expect(scores[1]).toBe(scores[2]);
});
test("with web intent, web chunk scores highest", () => {
const intent = "looking for notes about page load times and latency optimization";
const scores = chunks.map(c => scoreChunk(c, "performance", intent));
expect(scores[0]).toBeGreaterThan(scores[1]!);
expect(scores[0]).toBeGreaterThan(scores[2]!);
});
test("with health intent, health chunk scores highest", () => {
const intent = "looking for notes about exercise, sleep, and endurance";
const scores = chunks.map(c => scoreChunk(c, "performance", intent));
expect(scores[2]).toBeGreaterThan(scores[0]!);
expect(scores[2]).toBeGreaterThan(scores[1]!);
});
test("intent terms have lower weight than query terms (1.0)", () => {
const intent = "looking for latency";
// Chunk 0 has "performance" (query: 1.0) + "latency" (intent: INTENT_WEIGHT_CHUNK) = 1.5
const withBoth = scoreChunk(chunks[0]!, "performance", intent);
const queryOnly = scoreChunk(chunks[0]!, "performance");
expect(withBoth).toBe(queryOnly + INTENT_WEIGHT_CHUNK);
});
test("stop words are filtered, short domain terms survive", () => {
const intent = "the art of web performance";
// "the" (stop word), "art" (survives), "of" (stop word),
// "web" (survives), "performance" (survives)
// intent terms after filtering: ["art", "web", "performance"]
// Chunk 0 has "web" + "performance" → 2 intent hits (no "art")
// Chunks 1,2 have "performance" only → 1 intent hit
const scores = chunks.map(c => scoreChunk(c, "test", intent));
expect(scores[0]).toBe(INTENT_WEIGHT_CHUNK * 2); // "web" + "performance"
expect(scores[1]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only
expect(scores[2]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only
});
});
// =============================================================================
// Strong-signal bypass with intent
// =============================================================================
describe("strong-signal bypass logic", () => {
// Mirrors the logic in hybridQuery:
// const hasStrongSignal = !intent && topScore >= STRONG_SIGNAL_MIN_SCORE && gap >= STRONG_SIGNAL_MIN_GAP
function hasStrongSignal(topScore: number, secondScore: number, intent?: string): boolean {
return !intent
&& topScore >= 0.85
&& (topScore - secondScore) >= 0.15;
}
test("strong signal detected without intent", () => {
expect(hasStrongSignal(0.90, 0.70)).toBe(true);
});
test("strong signal bypassed when intent provided", () => {
expect(hasStrongSignal(0.90, 0.70, "looking for health performance")).toBe(false);
});
test("weak signal not affected by intent", () => {
expect(hasStrongSignal(0.50, 0.45)).toBe(false);
expect(hasStrongSignal(0.50, 0.45, "some intent")).toBe(false);
});
test("close scores not strong even without intent", () => {
expect(hasStrongSignal(0.90, 0.80)).toBe(false); // gap < 0.15
});
});
// =============================================================================
// parseStructuredQuery with intent
// =============================================================================
describe("parseStructuredQuery with intent", () => {
test("parses intent + lex query", () => {
const result = parseStructuredQuery("intent: web performance\nlex: performance");
expect(result).not.toBeNull();
expect(result!.intent).toBe("web performance");
expect(result!.searches).toHaveLength(1);
expect(result!.searches[0]!.type).toBe("lex");
expect(result!.searches[0]!.query).toBe("performance");
});
test("parses intent + multiple typed lines", () => {
const result = parseStructuredQuery(
"intent: web page load times\nlex: performance\nvec: how to improve performance"
);
expect(result).not.toBeNull();
expect(result!.intent).toBe("web page load times");
expect(result!.searches).toHaveLength(2);
expect(result!.searches[0]!.type).toBe("lex");
expect(result!.searches[1]!.type).toBe("vec");
});
test("intent can appear after typed lines", () => {
const result = parseStructuredQuery(
"lex: performance\nintent: web page load times\nvec: latency"
);
expect(result).not.toBeNull();
expect(result!.intent).toBe("web page load times");
expect(result!.searches).toHaveLength(2);
});
test("intent is case-insensitive prefix", () => {
const result = parseStructuredQuery("Intent: web perf\nlex: performance");
expect(result).not.toBeNull();
expect(result!.intent).toBe("web perf");
});
test("no intent returns undefined", () => {
const result = parseStructuredQuery("lex: performance\nvec: speed");
expect(result).not.toBeNull();
expect(result!.intent).toBeUndefined();
});
test("intent alone throws error", () => {
expect(() => parseStructuredQuery("intent: web performance")).toThrow(
/intent: cannot appear alone/
);
});
test("multiple intent lines throw error", () => {
expect(() =>
parseStructuredQuery("intent: web perf\nintent: team health\nlex: performance")
).toThrow(/only one intent: line is allowed/);
});
test("empty intent text throws error", () => {
expect(() =>
parseStructuredQuery("intent:\nlex: performance")
).toThrow(/intent: must include text/);
});
test("intent with whitespace-only text throws error", () => {
expect(() =>
parseStructuredQuery("intent: \nlex: performance")
).toThrow(/intent: must include text/);
});
test("single plain line still returns null (expand mode)", () => {
const result = parseStructuredQuery("how does auth work");
expect(result).toBeNull();
});
test("expand: line still returns null", () => {
const result = parseStructuredQuery("expand: auth stuff");
expect(result).toBeNull();
});
test("intent with expand throws error (expand can't mix)", () => {
expect(() =>
parseStructuredQuery("intent: web\nexpand: performance")
).toThrow(/cannot mix expand/);
});
test("empty query returns null", () => {
expect(parseStructuredQuery("")).toBeNull();
expect(parseStructuredQuery(" \n \n ")).toBeNull();
});
test("intent with blank lines is fine", () => {
const result = parseStructuredQuery(
"intent: web perf\n\nlex: performance\n\nvec: speed"
);
expect(result).not.toBeNull();
expect(result!.intent).toBe("web perf");
expect(result!.searches).toHaveLength(2);
});
test("intent preserves full text including colons", () => {
const result = parseStructuredQuery(
"intent: web performance: LCP, FID, CLS\nlex: performance"
);
expect(result).not.toBeNull();
expect(result!.intent).toBe("web performance: LCP, FID, CLS");
});
});
// =============================================================================
// Constants exported
// =============================================================================
describe("intent constants", () => {
test("INTENT_WEIGHT_SNIPPET is 0.3", () => {
expect(INTENT_WEIGHT_SNIPPET).toBe(0.3);
});
test("INTENT_WEIGHT_CHUNK is 0.5", () => {
expect(INTENT_WEIGHT_CHUNK).toBe(0.5);
});
});