2020-02-03 14:30:19 +00:00
|
|
|
const constants = require("../constants")
|
|
|
|
|
2020-05-30 11:04:06 +00:00
|
|
|
const dots = [
|
|
|
|
".", // full stop
|
|
|
|
"\u00b7", // middle dot
|
|
|
|
"\u2022", // bullet
|
|
|
|
"\u2027", // hyphenation point
|
|
|
|
"\u2219", // bullet operator
|
|
|
|
"\u22c5", // dot operator
|
|
|
|
"\u2e31", // word separator middle dot
|
|
|
|
"\u2e33", // raised dot
|
|
|
|
"\u30fb", // katakana middle dot
|
|
|
|
"\uff65", // halfwidth katakana middle dot
|
|
|
|
]
|
|
|
|
|
|
|
|
const dotRegex = new RegExp(`[\n ][\n #${dots.join("")}]*$`, "gms")
|
|
|
|
|
2020-02-03 14:30:19 +00:00
|
|
|
function tryMatch(text, against, callback) {
|
2020-05-19 07:03:03 +00:00
|
|
|
if (against instanceof RegExp && against.global) {
|
|
|
|
// if it's a global match, keep sending matches to the callback while the callback returns true
|
|
|
|
let matched
|
|
|
|
let ok = true
|
|
|
|
while (ok && (matched = against.exec(text))) {
|
|
|
|
ok = callback(matched)
|
|
|
|
}
|
|
|
|
against.lastIndex = 0
|
|
|
|
} else {
|
|
|
|
// if it's a non-global match, just do the match.
|
|
|
|
let matched = text.match(against)
|
|
|
|
if (matched) callback(matched)
|
|
|
|
}
|
2020-02-03 14:30:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function textToParts(text) {
|
|
|
|
return [{type: "text", text: text}]
|
|
|
|
}
|
|
|
|
|
|
|
|
function replacePart(parts, index, match, replacements) {
|
|
|
|
const toReplace = parts.splice(index, 1)[0]
|
|
|
|
const before = toReplace.text.slice(0, match.index)
|
|
|
|
const after = toReplace.text.slice(match.index + match[0].length)
|
|
|
|
parts.splice(index, 0, ...textToParts(before), ...replacements, ...textToParts(after))
|
|
|
|
}
|
|
|
|
|
|
|
|
function partsUsername(parts) {
|
|
|
|
for (let i = 0; i < parts.length; i++) {
|
|
|
|
if (parts[i].type === "text") {
|
2020-05-19 07:03:03 +00:00
|
|
|
tryMatch(parts[i].text, new RegExp(`@(${constants.external.username_regex})`, "g"), match => {
|
2020-05-19 15:36:51 +00:00
|
|
|
if (match.index === 0 || parts[i].text[match.index-1].match(/\W/)) { // check that there isn't a word directly before the username
|
2020-05-19 07:03:03 +00:00
|
|
|
replacePart(parts, i, match, [
|
|
|
|
{type: "user", text: match[0], user: match[1]}
|
|
|
|
])
|
|
|
|
i += 1 // skip the newly created part
|
|
|
|
return false
|
|
|
|
} else {
|
|
|
|
return true
|
|
|
|
}
|
2020-02-03 14:30:19 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2020-02-04 16:16:27 +00:00
|
|
|
return parts
|
2020-02-03 14:30:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function partsHashtag(parts) {
|
|
|
|
for (let i = 0; i < parts.length; i++) {
|
|
|
|
if (parts[i].type === "text") {
|
|
|
|
tryMatch(parts[i].text, `#(${constants.external.hashtag_regex})`, match => {
|
|
|
|
replacePart(parts, i, match, [
|
|
|
|
{type: "hashtag", text: match[0], hashtag: match[1]}
|
|
|
|
])
|
2020-05-19 07:03:03 +00:00
|
|
|
i += 1 // skip the newly created part
|
2020-02-03 14:30:19 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2020-02-04 16:16:27 +00:00
|
|
|
return parts
|
2020-02-03 14:30:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function structure(text) {
|
|
|
|
const parts = textToParts(text)
|
|
|
|
partsUsername(parts)
|
|
|
|
partsHashtag(parts)
|
|
|
|
return parts
|
|
|
|
}
|
|
|
|
|
2020-05-30 11:04:06 +00:00
|
|
|
/**
|
|
|
|
* Edit a structure in-place to remove trailing hashtags and separator characters.
|
|
|
|
*/
|
|
|
|
function removeTrailingHashtags(structured) {
|
|
|
|
let hasHashtags = structured.some(part => part.type === "hashtag")
|
|
|
|
let seenHashtags = false
|
|
|
|
|
|
|
|
function shouldRemoveLastPart() {
|
|
|
|
const part = structured[structured.length-1]
|
|
|
|
if (part.type === "hashtag") {
|
|
|
|
seenHashtags = true
|
|
|
|
return true
|
|
|
|
} else if (part.type === "user") {
|
|
|
|
if (hasHashtags && !seenHashtags) { // compromise?
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
} else if (part.type === "text") {
|
|
|
|
const content = part.text.replace(dotRegex, "")
|
|
|
|
if (content.length === 0) {
|
|
|
|
return true
|
|
|
|
} else {
|
|
|
|
part.text = content
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
while (shouldRemoveLastPart()) {
|
|
|
|
structured.pop()
|
|
|
|
}
|
|
|
|
|
|
|
|
return structured
|
|
|
|
}
|
|
|
|
|
2020-02-03 14:30:19 +00:00
|
|
|
module.exports.structure = structure
|
2020-02-04 16:16:27 +00:00
|
|
|
module.exports.partsUsername = partsUsername
|
|
|
|
module.exports.partsHashtag = partsHashtag
|
2020-05-30 11:04:06 +00:00
|
|
|
module.exports.removeTrailingHashtags = removeTrailingHashtags
|