* -------------------------------------------------------------------------
* This file is part of the MindStudio project.
* Copyright (c) 2025 Huawei Technologies Co.,Ltd.
*
* MindStudio is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*/
import DOMIterator from './domiterator';
import RegExpTool from './RegExpTool';
export default class Mark {
* @param { HTMLElement | HTMLElement[]} searchContext - 高亮操作的上下文范围
*/
constructor(searchContext) {
this.ctx = searchContext;
this.ie = /MSIE|Trident/.test(window.navigator.userAgent);
}
set opt(val) {
this._opt = Object.assign({}, {
highlightTagName: '',
className: '',
indexName: 'index',
exclude: [],
separateWordSearch: true,
acrossElements: false,
ignoreGroups: 0,
each: () => {},
noMatch: () => {},
filter: () => true,
done: () => {},
}, val);
}
get opt() {
return this._opt;
}
get iterator() {
return new DOMIterator(
this.ctx,
this.opt.exclude,
);
}
getSeparatedKeywords(sv) {
const uniqueKeywords = new Set();
const processKeyword = (keyword) => {
const trimmed = keyword.trim();
if (!trimmed) {
return;
}
if (this.opt.separateWordSearch) {
trimmed.split(/\s+/)
.forEach(word => uniqueKeywords.add(word));
} else {
uniqueKeywords.add(trimmed);
}
};
(Array.isArray(sv) ? sv : [sv]).forEach(processKeyword);
const sortedKeywords = Array.from(uniqueKeywords)
.sort((a, b) => b.length - a.length || a.localeCompare(b));
return {
keywords: sortedKeywords,
length: sortedKeywords.length,
};
}
getTextNodes(onComplete) {
let combinedValue = '';
const textNodes = [];
const processNode = (node) => {
const start = combinedValue.length;
combinedValue += node.textContent;
textNodes.push({
start,
end: combinedValue.length,
node,
});
};
const filterNode = (node) => {
return this.matchesExclude(node.parentNode)
? NodeFilter.FILTER_REJECT
: NodeFilter.FILTER_ACCEPT;
};
this.iterator.forEachNode(
NodeFilter.SHOW_TEXT,
processNode,
filterNode,
() => onComplete({
value: combinedValue,
nodes: textNodes,
}),
);
}
matchesExclude(el) {
return DOMIterator.matches(el, this.opt.exclude.concat([
'script', 'style', 'title', 'head', 'html',
]));
}
wrapRangeInTextNode(node, start, end, index) {
if (start < 0 || end < start) {
throw new Error(`Invalid start/end positions: ${start}-${end}`);
}
const tagName = this.opt.highlightTagName || 'mark';
const className = this.opt.className;
const middleNode = node.splitText(start);
const afterNode = middleNode.splitText(end - start);
const wrapper = document.createElement(tagName);
wrapper.setAttribute('data-markjs', 'true');
wrapper.setAttribute(`data-${this.opt.indexName}`, index);
if (className) {
wrapper.setAttribute('class', className);
}
wrapper.textContent = middleNode.textContent;
middleNode.parentNode.replaceChild(wrapper, middleNode);
return afterNode;
}
wrapRangeInMappedTextNode([dict, start, end, filterCb, eachCb, index]) {
let startOffset = start;
let endOffset = end;
for (let i = 0; i < dict.nodes.length; i++) {
const currentNode = dict.nodes[i];
const nextNode = dict.nodes[i + 1];
if (nextNode === undefined || nextNode.start > startOffset) {
if (!filterCb(currentNode.node)) {
break;
}
const relativeStart = startOffset - currentNode.start;
const relativeEnd = Math.min(endOffset, currentNode.end) - currentNode.start;
this.wrapNodeAndUpdateDict(currentNode, relativeStart, relativeEnd, dict, index);
this.adjustSubsequentOffsets(dict, i, relativeEnd);
endOffset -= relativeEnd;
eachCb(currentNode.node.previousSibling, currentNode.start);
if (endOffset > currentNode.end) {
startOffset = currentNode.end;
} else {
break;
}
}
}
}
wrapNodeAndUpdateDict(nodeInfo, relStart, relEnd, dict, index) {
const preservedStart = nodeInfo.start;
const prefix = dict.value.substring(0, preservedStart);
const suffix = dict.value.substring(preservedStart + relEnd);
nodeInfo.node = this.wrapRangeInTextNode(nodeInfo.node, relStart, relEnd, index);
dict.value = prefix + suffix;
}
adjustSubsequentOffsets(dict, startIndex, adjustLength) {
for (let j = startIndex; j < dict.nodes.length; j++) {
const node = dict.nodes[j];
if (j > startIndex) {
if (node.start > 0) {
node.start -= adjustLength;
}
}
node.end -= adjustLength;
}
}
wrapMatches(regex, ignoreGroups, filterCb, eachCb, endCb) {
const matchIdx = ignoreGroups === 0 ? 0 : ignoreGroups + 1;
const handleValidMatch = (match, currentNode) => {
let pos = match.index;
if (matchIdx !== 0) {
for (let i = 1; i < matchIdx; i++) {
pos += match[i].length;
}
}
const wrappedNode = this.wrapRangeInTextNode(
currentNode,
pos,
pos + match[matchIdx].length,
);
eachCb(wrappedNode.previousSibling);
return wrappedNode;
};
const processNode = nodeInfo => {
let currentNode = nodeInfo.node;
let match;
while ((match = regex.exec(currentNode.textContent)) !== null) {
if (match[matchIdx] === '') {
break;
}
if (!filterCb(match[matchIdx], currentNode)) {
continue;
}
currentNode = handleValidMatch(match, currentNode);
regex.lastIndex = 0;
}
};
const processTextNodes = dict => {
dict.nodes.forEach(processNode);
endCb();
};
this.getTextNodes(processTextNodes);
}
wrapMatchesAcrossElements(regex, ignoreGroups, filterCb, eachCb, endCb) {
const matchGroupIndex = this.getMatchGroupIndex(ignoreGroups);
this.processTextNodes(textNodeDict => {
const count = this.findAndWrapMatches(regex, textNodeDict, matchGroupIndex, filterCb, eachCb);
endCb(count);
});
}
getMatchGroupIndex(ignoreGroups) {
return ignoreGroups === 0 ? 0 : ignoreGroups + 1;
}
processTextNodes(callback) {
this.getTextNodes(textNodeDict => callback(textNodeDict));
}
findAndWrapMatches(regex, textNodeDict, matchGroupIndex, filterCb, eachCb) {
let matchResult;
let index = 0;
while ((matchResult = this.getNextMatch(regex, textNodeDict.value)) !== null) {
const { matchedText, startPos, endPos } = this.parseMatchResult(
matchResult,
matchGroupIndex,
);
if (!this.isValidMatch(matchedText)) {
break;
}
this.wrapRangeInMappedTextNode(
[textNodeDict,
startPos,
endPos,
currentNode => filterCb(matchedText, currentNode),
(wrappedNode, lastIndexPosition) => {
this.updateRegexIndex(regex, lastIndexPosition);
eachCb(wrappedNode);
},
index++],
);
}
return index;
}
getNextMatch(regex, textValue) {
return regex.exec(textValue);
}
parseMatchResult(matchResult, groupIndex) {
let startOffset = matchResult.index;
for (let i = 1; i < groupIndex; i++) {
startOffset += matchResult[i].length;
}
return {
matchedText: matchResult[groupIndex],
startPos: startOffset,
endPos: startOffset + matchResult[groupIndex].length,
};
}
isValidMatch(matchedText) {
return matchedText !== '';
}
updateRegexIndex(regex, lastIndex) {
regex.lastIndex = lastIndex;
}
unwrapMatches(node) {
const parent = node.parentNode;
const documentFragment = document.createDocumentFragment();
while (node.firstChild) {
documentFragment.appendChild(node.firstChild);
}
parent.replaceChild(documentFragment, node);
this.getNormalizationStrategy(parent);
}
getNormalizationStrategy(parent) {
this.ie ? this.normalizeTextNode(parent) : parent.normalize();
}
mergeAdjacentTextNodes(textNode) {
let nextSibling;
while ((nextSibling = textNode.nextSibling)?.nodeType === Node.TEXT_NODE) {
textNode.nodeValue += nextSibling.nodeValue;
textNode.parentNode.removeChild(nextSibling);
}
}
normalizeTextNode(node) {
if (!node) {
return;
}
if (node.nodeType === Node.TEXT_NODE) {
this.mergeAdjacentTextNodes(node);
} else {
this.normalizeTextNode(node.firstChild);
}
this.normalizeTextNode(node.nextSibling);
}
mark(sv, config) {
this.opt = config;
const { keywords, length: keywordsLen } = this.getSeparatedKeywords(Array.isArray(sv) ? sv : [sv]);
const searchFunctionName = this.opt.acrossElements ? 'wrapMatchesAcrossElements' : 'wrapMatches';
const isCaseInsensitive = this.opt.caseSensitive;
let totalMatches = 0;
const handleKeyword = kw => {
const regStr = new RegExpTool(config).createRegExp(kw);
const regex = new RegExp(regStr, `gm${isCaseInsensitive ? '' : 'i'}`);
let matcheCount = 0;
this[searchFunctionName](regex, 1, (term, node) => this.opt.filter(node, kw, totalMatches, matcheCount),
ele => {
matcheCount++;
totalMatches++;
this.opt.each(ele);
}, (strMatchesCount) => {
if (matcheCount === 0) {
this.opt.noMatch(kw);
}
if (keywords[keywordsLen - 1] === kw) {
this.opt.done(strMatchesCount);
} else {
handleKeyword(keywords[keywords.indexOf(kw) + 1]);
}
});
};
if (keywordsLen === 0) {
this.opt.done(totalMatches);
} else {
handleKeyword(keywords[0]);
}
}
* 移除上下文范围内所有标记元素及其HTML内容,并在操作完成后规范化父元素
*/
unmark(config = {}) {
this.opt = config;
const { highlightTagName, className } = this.opt;
const selector = `${highlightTagName ?? '*'}[data-markjs]${className !== '' && className !== undefined ? `.${className}` : ''}`;
this.iterator.forEachNode(NodeFilter.SHOW_ELEMENT, item => {
this.unwrapMatches(item);
},
node => {
const matchesSelector = DOMIterator.matches(node, selector);
const matchesExclude = this.matchesExclude(node);
return !matchesSelector || matchesExclude ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT;
}, this.opt.done);
}
}