"""Mock Embedder for testing.
Returns fixed-dimension vectors for reproducible testing.
Each unique text gets a deterministic vector based on its hash.
"""
import hashlib
from typing import Final
from core.interfaces import Embedder
MOCK_EMBEDDING_DIM: Final[int] = 384
class MockEmbedder(Embedder):
"""Mock embedder for testing.
Returns deterministic vectors based on text hash.
Each unique text produces a consistent vector across runs.
"""
def __init__(self, dimension: int = MOCK_EMBEDDING_DIM) -> None:
"""Initialize mock embedder.
Args:
dimension: Vector dimension (default: 384).
"""
self._dimension = dimension
def embed_texts(self, texts: list[str]) -> list[list[float]]:
"""Embed a batch of texts.
Args:
texts: List of input texts.
Returns:
List of vectors, one per input text.
Vectors are deterministic based on text content.
"""
results: list[list[float]] = []
for text in texts:
vector = self._hash_to_vector(text)
results.append(vector)
return results
def _hash_to_vector(self, text: str) -> list[float]:
"""Convert text to deterministic vector via hash.
Uses SHA256 hash of text to seed a pseudo-random vector.
Ensures same text always produces same vector.
"""
hash_bytes = hashlib.sha256(text.encode()).digest()
vector: list[float] = []
for i in range(self._dimension):
byte_val = hash_bytes[i % len(hash_bytes)]
normalized = (byte_val - 128) / 128.0
vector.append(normalized)
return vector
class ZeroEmbedder(Embedder):
"""Mock embedder that returns zero vectors.
Useful for testing isolation of vector effects.
All texts get identical zero vectors.
"""
def __init__(self, dimension: int = MOCK_EMBEDDING_DIM) -> None:
"""Initialize zero embedder.
Args:
dimension: Vector dimension (default: 384).
"""
self._dimension = dimension
def embed_texts(self, texts: list[str]) -> list[list[float]]:
"""Return zero vectors for all texts.
Args:
texts: List of input texts (ignored).
Returns:
List of zero vectors, one per input text.
"""
return [[0.0] * self._dimension for _ in texts]