您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
计算字符串重叠/匹配度计算
当前为
此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.greasyforks.org/scripts/501646/1416271/string-overlap-matching-degree.js
/** * 重叠匹配度 * @author: zhuangjie * @date: 2024-07-23 */ function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) { const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort)); // scope与 objArr 同步排序 sortAndSync(scopeForData, objArr) if(Array.isArray(scopeForObjArrContainer)) { // 说明需要分数,倒给 scopeForObjArrContainer.push(...scopeForData) } return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr; } // 根据scopeForData得到新数组objArr function filterMismatchItem(scopeForData,objArr) { const result = [] for(let [scope,index] of scopeForData.entries()) { if(scope != 0) { result.push(objArr[index]) } } return result } /** * 计算匹配度外层封装工具 * @param {string} keyword - 匹配字符串1 * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重 * @returns {number} 匹配度分数 */ function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") { // 兼容topicWeighs为topic数组,元素越靠前权重越高 if (Array.isArray(topicWeighs)) { const _temp = {} if (sort === "asc") { for (let i = 1; i <= topicWeighs.length; i++) { _temp[topicWeighs[i - 1]] = i; } } else { // desc for (let i = topicWeighs.length; i > 0; i--) { _temp[topicWeighs[topicWeighs.length - i]] = i; } } topicWeighs = _temp; } let topicList = Object.keys(topicWeighs) // topic map 得分 const topicScores = topicList.map(topic => { let topicScore = 0; // 得分 const currentTopicScore = topicWeighs[topic]; // 当前topic“个数” const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic) const overlapLengthKeys = Object.keys(overlapLengthBlocksMap); for (let overlapLengthKey of overlapLengthKeys) { const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey]; topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length; } return topicScore; }) // 算总分返回 return topicScores.reduce((a, b) => a + b, 0) } /** * 查找重叠匹配块(入口函数) * @param {*} str1 * @param {*} str2 * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]} * 算法核心思想: * ----------------------------------------------------- * sumatrapdf* | sumatrapdf* | sumatrapdf* * pdf- | pdf- | pdf- * ------------------------------------------------------ */ function findOverlapBlocks(str1 = "", str2 = "") { let maxStr2OffsetLength = str1.length - 1; let minStr2OffsetLength = -(str2.length - 1); const alignmentHub = { /* "3": ["好用","推荐"] */ } for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) { let str1EndIndex = str1.length - 1; let str2EndIndex = currentStr2OffsetLength + (str2.length - 1); let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [ let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ] // 对齐 const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1)); // 长重叠 覆盖 短重叠 longOverlappingCoverage(alignmentHub, alignmentContent); } return alignmentHub; } function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) { const modifiedCols = Object.keys(alignmentContent) const maxmodifiedCols = Math.max(...modifiedCols) const minmodifiedCols = Math.min(...modifiedCols) // 合并到对应的列上 modifiedCols.forEach(col => { alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col]; }) const alignmentHubCol = Object.keys(alignmentHub) const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols); const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols); // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串 // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串 colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols); // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串 colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols); } // 列元素过滤 function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) { if (cols.length == 0 || gtCols.length == 0) return; gtCols.forEach(gtCol => { const gtOverlappingBlocks = alignmentHub[gtCol]; for (let [index, col] of cols.entries()) { const overlappingBlocks = alignmentHub[col]; if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;; alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => { for (let gtOverlappingBlock of gtOverlappingBlocks) { if (gtOverlappingBlock.includes(overlappingBlock)) { return false } } return true; }) } }) // 清理掉value为空数组项 // {1: [],2:["好用"]} -to-> {2:["好用"]} Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] }); } // 对齐 function alignment(str1 = "", str2 = "") { // 返回 {"1",["我","用"],"2":["推荐","可以"]} if (str1.length != str2.length) { throw new Error("算法错误,对齐字符串长度不一致"); } const overlappingBlocks = {} let currentOverlappingBlock = ""; for (let i = str1.length - 1; i >= 0; i--) { if (str1[i] == str2[i]) { currentOverlappingBlock = str1[i] + currentOverlappingBlock; if (i - 1 >= 0) continue; } if (currentOverlappingBlock.length == 0) { continue; } else { // 块收集 let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] if (currentOverlappingBlockContainer == undefined) { currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock] } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) { currentOverlappingBlockContainer.push(currentOverlappingBlock) } } currentOverlappingBlock = ""; } return overlappingBlocks; } // 【同步算法-堆实现】 function sortAndSync(arr1, arr2, order = 'desc') { if (arr1.length !== arr2.length) { throw new Error("Both arrays must have the same length"); } function swap(arr, i, j) { [arr[i], arr[j]] = [arr[j], arr[i]]; } function heapify(arr1, arr2, n, i, order) { let largest = i; let left = 2 * i + 1; let right = 2 * i + 2; if (order === 'asc') { if (left < n && arr1[left] > arr1[largest]) { largest = left; } if (right < n && arr1[right] > arr1[largest]) { largest = right; } } else { if (left < n && arr1[left] < arr1[largest]) { largest = left; } if (right < n && arr1[right] < arr1[largest]) { largest = right; } } if (largest !== i) { swap(arr1, i, largest); swap(arr2, i, largest); heapify(arr1, arr2, n, largest, order); } } function buildHeap(arr1, arr2, n, order) { for (let i = Math.floor(n / 2) - 1; i >= 0; i--) { heapify(arr1, arr2, n, i, order); } } function heapSort(arr1, arr2, order) { let n = arr1.length; buildHeap(arr1, arr2, n, order); for (let i = n - 1; i > 0; i--) { swap(arr1, 0, i); swap(arr2, 0, i); heapify(arr1, arr2, i, 0, order); } } heapSort(arr1, arr2, order); } // 【算法测试1】 // console.log("-- 算法测试开始 --") // console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐")) // console.log("-- 算法测试结束 --") // 【算法测试2】 // console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));