* @fileoverview Tests for text_extractor.ts.
*/
import type {TextChunk, TextChunkConsumer} from '//ios/web/annotations/resources/text_extractor.js';
import {TextExtractor} from '//ios/web/annotations/resources/text_extractor.js';
import {expectEq, expectNeq, load, TestSuite} from '//ios/web/annotations/resources/text_test_utils.js';
export class TestTextExtractor extends TestSuite {
textChunk?: TextChunk;
chunkConsumer: TextChunkConsumer = (chunk: TextChunk): void => {
this.textChunk = chunk;
};
override setUp() {
this.textChunk = undefined;
}
testTextExtractorFlow() {
const html = '<invisible>012</invisible>' +
'<visible>abc</visible>' +
'<visible>defgh</visible>' +
' ' +
'<!-- Comment should be ignored -->' +
'<visible>ijkl</visible>' +
'<invisible>mno</invisible>' +
'<span>pqr</span>' +
'<span>stuv</span>' +
'<visible>wxyz</visible>' +
'\n' +
'<div>345678</div>';
load(html);
const extractor = new TextExtractor(this.chunkConsumer, 5, '|');
const root = document.body;
extractor.begin();
for (const childNode of root.childNodes) {
if (childNode.nodeType === Node.TEXT_NODE) {
extractor.visibleTextNode(childNode as Text);
} else if (childNode.nodeName === 'VISIBLE') {
extractor.enterVisibleNode(childNode);
extractor.visibleTextNode(childNode.childNodes[0] as Text);
extractor.leaveVisibleNode(childNode);
} else if (childNode.nodeName === 'SPAN') {
extractor.enterVisibleNode(childNode);
extractor.visibleTextNode(childNode.childNodes[0] as Text);
extractor.leaveVisibleNode(childNode);
} else if (childNode.nodeName === 'INVISIBLE') {
extractor.invisibleNode(childNode);
}
}
expectEq(true, extractor.spaced);
extractor.end();
expectNeq(undefined, this.textChunk, 'textChunk:');
expectEq(
'012 ' +
'abc' +
' ' +
'defgh' +
' ' +
'ijkl' +
' ' +
'|' +
'pqr' +
'stuv' +
' ' +
'wxyz' +
' ' +
'34567',
this.textChunk!.text);
expectEq(0, this.textChunk!.firstNodeOffset);
expectEq(4, this.textChunk!.visibleStart);
expectEq(33, this.textChunk!.visibleEnd);
expectEq(8, this.textChunk!.sections.length);
expectEq(0, this.textChunk!.sections[0]!.index);
expectEq('012', this.textChunk!.sections[0]!.textNode!.textContent);
expectEq(4, this.textChunk!.sections[1]!.index);
expectEq('abc', this.textChunk!.sections[1]!.textNode!.textContent);
expectEq(8, this.textChunk!.sections[2]!.index);
expectEq('defgh', this.textChunk!.sections[2]!.textNode!.textContent);
expectEq(14, this.textChunk!.sections[3]!.index);
expectEq('ijkl', this.textChunk!.sections[3]!.textNode!.textContent);
expectEq(20, this.textChunk!.sections[4]!.index);
expectEq('pqr', this.textChunk!.sections[4]!.textNode!.textContent);
expectEq(23, this.textChunk!.sections[5]!.index);
expectEq('stuv', this.textChunk!.sections[5]!.textNode!.textContent);
expectEq(28, this.textChunk!.sections[6]!.index);
expectEq('wxyz', this.textChunk!.sections[6]!.textNode!.textContent);
expectEq(33, this.textChunk!.sections[7]!.index);
expectEq('345678', this.textChunk!.sections[7]!.textNode!.textContent);
}
}