您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Motor de lógica de normalización para WME Place Normalizer. No funciona por sí solo.
此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.greasyforks.org/scripts/548747/1657858/WME%20PLN%20Core%20-%20Normalization%20Engine.js
// ==UserScript== // @name WME PLN Core - Normalization Engine // @namespace https://greasyforks.org/en/users/mincho77 // @version 9.0.0 // @description Motor de lógica de normalización para WME Place Normalizer. No funciona por sí solo. // @author mincho77 // @license MIT // @grant none // ==/UserScript== // Helper local (usa PLNCore.utils.escapeRegExp si existe) function plnEscapeRegExpLocal(s){ const f = (typeof PLNCore !== 'undefined' && PLNCore.utils && typeof PLNCore.utils.escapeRegExp === 'function') ? PLNCore.utils.escapeRegExp : (x => String(x).replace(/[.*+?^${}()|[\\]\\\\]/g, '\\$&')); return f(String(s)); } function applySwapRules(originalName, deps) { try { const DBG = !!(window.__PLN_SWAP_DEBUG_ON || localStorage.getItem('wme_pln_debug_swap') === '1'); let name = String(originalName || ''); const swaps = (typeof plnCollectSwapRules === 'function') ? plnCollectSwapRules(deps) : (Array.isArray(deps?.swapWords) ? deps.swapWords : Array.isArray(window.swapWords) ? window.swapWords : []); if (DBG) plnLog('swap', 'applySwapRules', { originalName, swapsCount: Array.isArray(swaps) ? swaps.length : 0 }); if (!swaps.length) { if (DBG) { plnLog('swap', 'skip: no swaps configured'); } return name; } const normalizeSpace = s => s.replace(/\s+/g, ' ').replace(/\s*-\s*/g, ' - ').trim(); for (const raw of swaps) { if (!raw) { if (DBG) plnLog('swap', 'skip: null item'); continue; } const token = String((raw.word || raw.text || raw.token || '').trim()); if (!token) { if (DBG) plnLog('swap', 'skip: empty token', raw); continue; } let where = String((raw.position || raw.where || raw.dir || raw.direction || '')).toLowerCase(); if (where === 'antes' || where === 'before' || where === 'pre') where = 'before'; if (where === 'despues' || where === 'después' || where === 'after' || where === 'post') where = 'after'; if (where !== 'before' && where !== 'after') { if (DBG) plnLog('swap', `skip [${token}]: invalid position`, raw); continue; } const esc = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\s+/g, '\\s+'); const SEP = '[\\s,.;:()\\[\\]\\-–—\\/]'; const reFind = new RegExp(`(?:^|${SEP})${esc}(?=$|${SEP})`, 'iu'); const reAnywhere = new RegExp(`(?:^|${SEP})${esc}(?=$|${SEP})`, 'giu'); const reStart = new RegExp(`^\\s*${esc}(?=$|${SEP})`, 'iu'); const reEnd = new RegExp(`(?:^|${SEP})${esc}\\s*$`, 'iu'); if (DBG) plnLog('swap', `[${token}] → ${where}`); if (!reFind.test(name)) { if (DBG) { plnLog('swap', 'no-op: token not present in name', { name, token }); } continue; } if ((where === 'before' && reStart.test(name)) || (where === 'after' && reEnd.test(name))) { if (DBG) { plnLog('swap', 'no-op: already at target edge', { name }); } name = normalizeSpace(name); continue; } const before = name; name = name.replace(reAnywhere, ' ').replace(/\s{2,}/g, ' ').trim(); name = where === 'before' ? `${token} ${name}`.trim() : `${name} ${token}`.trim(); name = normalizeSpace(name); if (DBG) plnLog('swap', 'moved', { before, after: name }); } if (DBG) { plnLog('swap', 'result =>', name); } return name; } catch (e) { if (window.__PLN_SWAP_DEBUG_ON) plnLog('error', '[PLN Swap] error', e); return originalName; } } function plnCollectSwapRules(deps) { try { const normDir = v => { v = String(v || '').toLowerCase(); if (v === 'antes' || v === 'before' || v === 'pre' || v === 'start') return 'before'; if (v === 'despues' || v === 'después' || v === 'after' || v === 'post' || v === 'end') return 'after'; return null; }; const key = s => String(s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase().trim(); const map = new Map(); const setRule = (w, d, pri) => { w = String(w || '').trim(); d = normDir(d); if (!w || !d) return; const k = key(w); const prev = map.get(k); if (!prev || (prev._pri || 0) <= pri) { map.set(k, { word: w, position: d, _pri: pri }); } }; (Array.isArray(deps?.swapWords) ? deps.swapWords : Array.isArray(window.swapWords) ? window.swapWords : []).forEach(x => { if (!x) return; if (typeof x === 'string') { setRule(x, 'before', 1); return; } const w = x.word || x.text || x.token || x.value || x.name; const d = x.position || x.where || x.dir || x.direction; setRule(w, d, 1); }); const FORCED_DIR = { 'urbanizacion': 'after' }; const FORCED_DISPLAY = { 'urbanizacion': 'Urbanización' }; for (const [k, rec] of map.entries()) { const forced = FORCED_DIR[k]; if (forced === 'after' || forced === 'before') rec.position = forced; } for (const fk in FORCED_DIR) { if (!map.has(fk)) { map.set(fk, { word: FORCED_DISPLAY[fk] || fk, position: FORCED_DIR[fk], _pri: 999 }); } } const arr = Array.from(map.values()).map(({ word, position }) => ({ word, position })); arr.sort((a, b) => b.word.length - a.word.length); return arr; } catch (e) { return []; } } function processPlaceName(originalName, deps) { let processedName = (originalName || '').trim(); const exclusions = new Map(); let placeholderIndex = 0; const EXC = deps?.excludedWords ?? window.excludedWords; const EXCM = deps?.excludedWordsMap ?? window.excludedWordsMap; const REPL = deps?.replacementWords ?? (typeof window.replacementWords === 'object' ? window.replacementWords : {}); const SKIP = deps?.skipGeneralReplacements ?? (typeof window.skipGeneralReplacements === 'boolean' ? window.skipGeneralReplacements : false); const DICT = deps?.dictionaryWords ?? window.dictionaryWords; if (EXC && EXC.size > 0) { const sortedExclusions = Array.from(EXC).sort((a, b) => b.length - a.length); sortedExclusions.forEach(phrase => { if (!phrase) return; const escapedPhrase = phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const regex = new RegExp(`\\b${escapedPhrase}\\b`, 'gi'); processedName = processedName.replace(regex, (match) => { const placeholder = `__PLN_EXCLUDED_${placeholderIndex}__`; exclusions.set(placeholder, phrase); placeholderIndex++; return placeholder; }); }); } processedName = processedName.replace(/\|/g, ' - '); processedName = processedName.replace(/\s{2,}/g, ' ').trim(); const words = processedName.split(/\s+/).filter(word => word.length > 0); const commonWords = ['es', 'de', 'del', 'el', 'la', 'los', 'las', 'y', 'e', 'o', 'u', 'un', 'una', 'unos', 'unas', 'a', 'en', 'con', 'tras', 'por', 'al', 'lo']; const normalizedWords = words.map((word, index) => { if (word.startsWith('__PLN_EXCLUDED_')) { return word; } if (word === '-') return '-'; const lower = (word || '').toLowerCase(); if (commonWords.includes(lower)) { const prevIsHyphen = index > 0 && words[index - 1] === '-'; const prevIsOpenParen = index > 0 && words[index - 1] === '('; if (index === 0 || prevIsHyphen || prevIsOpenParen) { return lower.charAt(0).toUpperCase() + lower.slice(1); } return lower; } return normalizeWordInternal(word, index === 0, false, { EXC, EXCM, REPL, SKIP, DICT }); }); processedName = normalizedWords.join(" "); processedName = aplicarReglasEspecialesNombre(processedName, { EXC, EXCM, REPL, SKIP, DICT }); processedName = postProcessQuotesAndParentheses(processedName); if (REPL && typeof REPL === 'object' && Object.keys(REPL).length > 0) { processedName = aplicarReemplazosDefinidos(processedName, REPL); } processedName = aplicarReemplazosGenerales(processedName, { SKIP }); exclusions.forEach((originalPhrase, placeholder) => { processedName = processedName.replace(placeholder, originalPhrase); }); let finalName = processedName.replace(/\s{2,}/g, ' ').trim(); finalName = finalName.replace(/\s*-\s*$/, ''); if (finalName.endsWith('.')) { finalName = finalName.slice(0, -1); } return finalName; } function normalizePlaceName(word) { if (!word || typeof word !== "string") return ""; if (word.includes("/")) { if (word === "/") return "/"; return word.split("/").map(part => normalizePlaceName(part.trim())).join("/"); } if (/^[0-9]+$/.test(word)) return word; word = word.replace(/(\d)([a-zA-Z])/g, (_, num, letter) => `${num}${letter.toUpperCase()}`); const romanRegexStrict = /^(C{0,3}(XC|XL|L?X{0,3})?(IX|IV|V?I{0,3})?)$/i; if (romanRegexStrict.test(word)) return word.toUpperCase(); if (/^[A-ZÁÉÍÓÚÑ0-9.]+$/.test(word) && word.length > 1 && (word.includes('.') || /^[A-ZÁÉÍÓÚÑ]+$/.test(word))) { if (word.toUpperCase() === "DI" || word.toUpperCase() === "SI") return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); return word; } return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); } function normalizeWordInternal(word, isFirstWordInSequence = false, isInsideQuotesOrParentheses = false, deps) { if (!word || typeof word !== 'string') return ""; const lowerWord = word.toLowerCase(); if (deps?.DICT && deps.DICT.has(lowerWord)) { const originalDictEntry = Array.from(deps.DICT).find(w => w.toLowerCase() === lowerWord); if (originalDictEntry) return originalDictEntry; } if (deps?.EXC && deps?.EXCM) { const cleanedInputWord = removeDiacritics(word.toLowerCase()); const firstChar = word.charAt(0).toLowerCase(); const excludedCandidates = deps.EXCM.get(firstChar); if (excludedCandidates) { for (const excludedWord of excludedCandidates) { if (removeDiacritics(excludedWord.toLowerCase()) === cleanedInputWord) return excludedWord; } } } if (word.includes('-') && /\p{L}-\p{L}/u.test(word)) { return word.split('-').map(part => /^[A-ZÁÉÍÓÚÑ0-9.]+$/.test(part) && part.length > 1 ? part : part.charAt(0).toUpperCase() + part.slice(1).toLowerCase()).join('-'); } if (word.includes("'")) return handleApostropheWord(word); if (/^[A-ZÁÉÍÓÚÑ0-9.&]+$/.test(word) && word.length > 1) return word; if (/^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i.test(word)) return word.toUpperCase(); const commonWords = ['es', 'de', 'del', 'el', 'la', 'los', 'las', 'y', 'e', 'o', 'u', 'un', 'una', 'unos', 'unas', 'a', 'en', 'con', 'tras', 'por', 'al', 'lo']; const lowerWordForCommonCheck = word.toLowerCase().replace('.', ''); if (commonWords.includes(lowerWordForCommonCheck)) { if (lowerWordForCommonCheck === "y") return isFirstWordInSequence ? "Y" : "y"; if (lowerWordForCommonCheck === "e") return isFirstWordInSequence ? "E" : "e"; if (isFirstWordInSequence && !isInsideQuotesOrParentheses) return lowerWordForCommonCheck.charAt(0).toUpperCase() + lowerWordForCommonCheck.slice(1); return lowerWordForCommonCheck; } let wordWithoutPunctuation = word.endsWith('.') ? word.slice(0, -1) : word; return wordWithoutPunctuation.charAt(0).toUpperCase() + wordWithoutPunctuation.slice(1).toLowerCase(); } function aplicarReemplazosGenerales(name, deps) { if (deps?.SKIP) return name; name = removeEmoticons(name); const reglas = [ { buscar: /\|/g, reemplazar: " - " }, { buscar: /\s*\/\s*/g, reemplazar: " - " }, { buscar: /\[[Pp]\]/g, reemplazar: "" }, { buscar: /(\p{L}|\p{N})\s*-\s*(\p{L}|\p{N})/gu, reemplazar: "$1 - $2" }, { buscar: /\s*-\s*/g, reemplazar: " - " }, { buscar: /\s{2,}/g, reemplazar: ' ' }, ]; reglas.forEach(regla => { name = name.replace(regla.buscar, regla.reemplazar); }); name = name.replace(/\s{2,}/g, ' ').trim(); name = name.replace(/\s*-\s*-\s*/g, ' - '); name = name.replace(/--/g, '-'); return name; } function aplicarReglasEspecialesNombre(newName, deps) { newName = newName.replace(/-(\s*)([^\s]+)/g, (match, spaces, nextWord) => `-${spaces}${normalizeWordInternal(nextWord, true, false, deps)}`); newName = newName.replace(/\.\s+([a-z])/g, (match, letter) => `. ${letter.toUpperCase()}`); newName = newName.replace(/(\(\s*)([a-zA-Z])/g, (match, P1, P2) => P1 + P2.toUpperCase()); newName = newName.replace(/\s([a-zA-Z])$/, (match, letter) => ` ${letter.toUpperCase()}`); return newName.replace(/\s{2,}/g, ' ').trim(); } function aplicarReemplazosDefinidos(text, replacementRules) { let newText = text; if (typeof replacementRules !== 'object' || replacementRules === null || Object.keys(replacementRules).length === 0) return newText; const sortedFromKeys = Object.keys(replacementRules).sort((a, b) => b.length - a.length); for (const fromKey of sortedFromKeys) { const toValue = replacementRules[fromKey]; const escapedFromKey = plnEscapeRegExpLocal(String(fromKey)); let regex; const wordCharSet = '[\\p{L}\\p{N}_-]'; if (toValue.endsWith(' -')) { regex = new RegExp(`(^|[^\\p{L}\\p{N}_\\-])(${escapedFromKey})(\\s+)(${wordCharSet}+)?(?=$|[^\\p{L}\\p{N}_-])`, 'giu'); } else { regex = new RegExp(`(^|[^\\p{L}\\p{N}_-])(${escapedFromKey})(?=$|[^\\p{L}\\p{N}_-])`, 'giu'); } newText = newText.replace(regex, (match, ...args) => { const originalString = args[args.length - 1]; const offset = args[args.length - 2]; let delimitadorPrevio, matchedFromKey, capturedSpaces, nextWordIfCaptured; if (toValue.endsWith(' -')) { [delimitadorPrevio, matchedFromKey, capturedSpaces, nextWordIfCaptured] = args; } else { [delimitadorPrevio, matchedFromKey] = args; } if (toValue.endsWith(' -')) { return delimitadorPrevio + toValue + (nextWordIfCaptured || ''); } return delimitadorPrevio + toValue; }); } return newText; } function isExcludedWord(word, deps) { if (!word || !(deps?.EXC || window.excludedWords)) return null; const clean = w => w.trim().toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, ""); const cleanedWord = clean(word); for (const excl of (deps?.EXC || window.excludedWords)) { if (clean(excl) === cleanedWord) { return excl; } } return null; } function plnApplyExclusions(str, deps) { try { const reWord = /([\p{L}\p{M}][\p{L}\p{M}\.'’]*)/gu; return String(str || '').replace(reWord, (m) => { try { const excl = typeof isExcludedWord === 'function' ? isExcludedWord(m, deps) : null; return excl ? excl : m; } catch (_) { return m; } }); } catch (_) { return String(str || ''); } } function handleApostropheWord(word) { const parts = word.split("'"); if (parts.length === 2) { const [before, after] = parts; if (after.toLowerCase() === 's') { return before + "'s"; } else { return before + "'" + (after.charAt(0).toUpperCase() + after.slice(1).toLowerCase()); } } return word; } function postProcessQuotesAndParentheses(text) { if (typeof text !== 'string') return text; const capitalizeFirstLetter = (string) => !string ? string : string.charAt(0).toUpperCase() + string.slice(1); text = text.replace(/"([^"]*)"/g, (match, content) => `"${capitalizeFirstLetter(content.trim())}"`); text = text.replace(/\(([^)]*)\)/g, (match, content) => `(${capitalizeFirstLetter(content.trim())})`); return text.replace(/\s+/g, ' ').trim(); } function removeEmoticons(text) { if (!text || typeof text !== 'string') return ''; const emojiRegex = /[\p{Emoji_Presentation}\p{Extended_Pictographic}]/gu; return text.replace(emojiRegex, '').trim().replace(/\s{2,}/g, ' '); } function isValidExcludedWord(newWord, deps) { if (!newWord) return { valid: false, msg: "La palabra no puede estar vacía." }; const lowerNewWord = newWord.toLowerCase(); const DICT = deps?.dictionaryWords || window.dictionaryWords; const EXC = deps?.excludedWords || window.excludedWords; const EXCM = deps?.excludedWordsMap|| window.excludedWordsMap; if (newWord.length === 1) return { valid: false, msg: "No se permite agregar palabras de un solo caracter." }; if (/[-'\s]/.test(newWord)) return { valid: true }; if (/^[^a-zA-Z0-9áéíóúÁÉÍÓÚñÑ]+$/.test(newWord)) return { valid: false, msg: "No se permite agregar solo caracteres especiales." }; if (DICT) { if (Array.from(DICT).some(w => w.toLowerCase() === lowerNewWord)) { return { valid: false, msg: "La palabra ya existe en el diccionario (sin considerar mayúsculas/minúsculas). No se puede agregar a especiales." }; } if (Array.from(DICT).some(w => w === newWord)) { return { valid: false, msg: "La palabra (con esta capitalización exacta) ya existe en el diccionario. No se puede agregar a especiales." }; } } const commonWords = ['es', 'de', 'del', 'el', 'la', 'los', 'las', 'y', 'e', 'o', 'u', 'un', 'una', 'unos', 'unas', 'a', 'en', 'con', 'tras', 'por', 'al', 'lo']; if (commonWords.includes(lowerNewWord)) return { valid: false, msg: "Esa palabra es muy común y no debe agregarse a la lista." }; if (EXC) { if (EXC.has(newWord)) return { valid: false, msg: "La palabra (con esta capitalización exacta) ya está en la lista." }; if (EXCM) { const firstChar = lowerNewWord.charAt(0); const candidatesForFirstChar = EXCM.get(firstChar); if (candidatesForFirstChar) { for (const existingWord of candidatesForFirstChar) { if (existingWord.toLowerCase() === lowerNewWord) { return { valid: false, msg: "Esta palabra ya existe en la lista (con diferente capitalización)." }; } } } } } return { valid: true }; }