from unittest.mock import patch, MagicMock, ANY
import pytest
from openjiuwen_deepsearch.algorithm.prompts.template import apply_system_prompt
from openjiuwen_deepsearch.algorithm.source_trace.content_analyzer import (
recognize_content_to_cite,
validate_and_enhance_sentences,
find_similar_sentence
)
pytest_plugins = ["pytest_asyncio"]
class TestRecognizeContentToCite:
"""Test cases for recognize_content_to_cite function."""
@patch('openjiuwen_deepsearch.algorithm.source_trace.content_analyzer.llm_context')
@patch('openjiuwen_deepsearch.algorithm.source_trace.content_analyzer.ainvoke_llm_with_stats')
@pytest.mark.asyncio
async def test_recognize_content_to_cite_llm_invoke_error(self, mock_ainvoke, mock_llm_wrapper):
"""Test recognition when LLM invocation fails."""
mock_llm_instance = MagicMock()
mock_llm_wrapper.return_value = mock_llm_instance
mock_ainvoke.side_effect = Exception("Invoke error")
modified_report = "This is a sample report."
similarity_threshold = 0.8
result = await recognize_content_to_cite(modified_report, similarity_threshold, "mock_model")
assert result == []
class TestApplySystemPrompt:
"""Test cases for apply_system_prompt function when used in content recognition."""
def test_apply_system_prompt_content_recognition(self):
"""Test that apply_system_prompt works correctly for content recognition."""
context = {"report": "This is a sample report with some content."}
result = apply_system_prompt("content_recognition", context)
assert isinstance(result, list)
assert len(result) == 1
assert result[0]["role"] == "system"
assert "This is a sample report with some content." in result[0]["content"]
assert "content recognition" in result[0]["content"].lower(
) or "content" in result[0]["content"].lower()
class TestValidateAndEnhanceSentences:
"""Test cases for validate_and_enhance_sentences function."""
def test_validate_and_enhance_sentences_basic(self):
"""Test basic functionality of validating and enhancing sentences."""
llm_result = '{"sentences": ["This is sentence 1.", "This is sentence 2."]}'
report = "This is sentence 1. This is sentence 2. Additional content."
similarity_threshold = 0.8
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert "This is sentence 1." in result
assert "This is sentence 2." in result
assert len(result) == 2
def test_validate_and_enhance_sentences_with_similar_sentences(self):
"""Test handling sentences that are similar but not exactly matching."""
llm_result = '{"sentences": ["This is sentence one.", "This is sentence two."]}'
report = "This is sentence 1. This is sentence 2. Additional content."
similarity_threshold = 0.7
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert len(result) >= 0
def test_validate_and_enhance_sentences_with_exact_matches(self):
"""Test handling sentences that have exact matches in the report."""
llm_result = '{"sentences": ["This is sentence 1.", "This is sentence 2."]}'
report = "This is sentence 1. This is sentence 2. Additional content."
similarity_threshold = 0.9
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert "This is sentence 1." in result
assert "This is sentence 2." in result
assert len(result) == 2
def test_validate_and_enhance_sentences_no_matches(self):
"""Test handling sentences that have no matches in the report."""
llm_result = '{"sentences": ["This sentence is not in report."]}'
report = "This is a completely different report."
similarity_threshold = 0.9
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert result == []
def test_validate_and_enhance_sentences_duplicate_handling(self):
"""Test handling duplicate sentences in the input."""
llm_result = '{"sentences": ["This is sentence 1.", "This is sentence 1."]}'
report = "This is sentence 1. Additional content."
similarity_threshold = 0.9
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert len(result) == 1
assert "This is sentence 1." in result
def test_validate_and_enhance_sentences_empty_json(self):
"""Test handling empty JSON input."""
llm_result = '{}'
report = "This is a report."
similarity_threshold = 0.9
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert result == []
def test_validate_and_enhance_sentences_no_sentences_key(self):
"""Test handling JSON without sentences key."""
llm_result = '{"other_key": "value"}'
report = "This is a report."
similarity_threshold = 0.9
result = validate_and_enhance_sentences(
llm_result, report, similarity_threshold)
assert result == []
class TestFindSimilarSentence:
"""Test cases for find_similar_sentence function."""
def test_find_similar_sentence_exact_match(self):
"""Test finding an exact matching sentence."""
sentence = "这是一个测试句子。"
report = "这是一个测试句子。这是另一个句子。"
similarity_threshold = 0.8
result = find_similar_sentence(sentence, report, similarity_threshold)
assert result != ""
def test_find_similar_sentence_high_similarity(self):
"""Test finding a sentence with high similarity."""
sentence = "This is a test sentence."
report = "This is a test sentence! Here is another sentence."
similarity_threshold = 0.9
result = find_similar_sentence(sentence, report, similarity_threshold)
assert result != ""
assert "This is a test sentence" in result
def test_find_similar_sentence_low_similarity(self):
"""Test when no sentence meets the similarity threshold."""
sentence = "Completely different sentence."
report = "This is a test sentence. Here is another sentence."
similarity_threshold = 0.9
result = find_similar_sentence(sentence, report, similarity_threshold)
assert result == ""
def test_find_similar_sentence_multiple_candidates(self):
"""Test finding the most similar sentence among multiple candidates."""
sentence = "这是一个测试句子!"
report = "完全不同。这是一个测试句子!另一个不同的内容。"
similarity_threshold = 0.7
result = find_similar_sentence(sentence, report, similarity_threshold)
assert result != ""
def test_find_similar_sentence_with_threshold_0(self):
"""Test finding any sentence when threshold is 0."""
sentence = "Any sentence"
report = "This is a test sentence."
similarity_threshold = 0.0
result = find_similar_sentence(sentence, report, similarity_threshold)
assert isinstance(result, str)
def test_find_similar_sentence_empty_report(self):
"""Test when report is empty."""
sentence = "This is a test sentence."
report = ""
similarity_threshold = 0.9
result = find_similar_sentence(sentence, report, similarity_threshold)
assert result == ""
def test_find_similar_sentence_empty_sentence(self):
"""Test when target sentence is empty."""
sentence = ""
report = "This is a test sentence."
similarity_threshold = 0.9
result = find_similar_sentence(sentence, report, similarity_threshold)
assert isinstance(result, str)