From 7461f25ca97bcb894e3fa5f54a010905ebb44f26 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Fri, 6 May 2022 01:36:40 +1200 Subject: [PATCH] Use boring font for consistent non-latin script appearance based on page heuristics --- src/lang/ar.js | 1 + src/lang/bg.js | 1 + src/lang/fa.js | 1 + src/lang/ru.js | 1 + src/lang/uk.js | 1 + src/lib/structures/BaseUser.js | 6 ++++++ src/lib/structures/TimelineEntry.js | 8 ++++++++ src/lib/utils/islatin.js | 16 ++++++++++++++++ src/site/pug/home.pug | 2 +- src/site/pug/post.pug | 2 +- src/site/pug/settings.pug | 2 +- src/site/pug/user.pug | 2 +- src/site/sass/includes/_main.sass | 6 ++++-- 13 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 src/lib/utils/islatin.js diff --git a/src/lang/ar.js b/src/lang/ar.js index 8e4e33e..3ce3a3b 100644 --- a/src/lang/ar.js +++ b/src/lang/ar.js @@ -5,6 +5,7 @@ if (!constants.language_dev) Object.assign(data, require("./en.js")) ;(() => { data.meta_direction = "rtl" + data.meta_use_boring_font = true data.go_to_profile = "اذهب إلى الملف الشخصي" data.go_to_post = "اذهب إلى المنشور" diff --git a/src/lang/bg.js b/src/lang/bg.js index b781008..a4527c9 100644 --- a/src/lang/bg.js +++ b/src/lang/bg.js @@ -5,6 +5,7 @@ if (!constants.language_dev) Object.assign(data, require("./en.js")) ;(() => { data.meta_direction = "ltr" + data.meta_use_boring_font = true data.go_to_profile = "Виж профил" data.go_to_post = "Виж публикация" diff --git a/src/lang/fa.js b/src/lang/fa.js index df7b0eb..2d17034 100644 --- a/src/lang/fa.js +++ b/src/lang/fa.js @@ -5,6 +5,7 @@ if (!constants.language_dev) Object.assign(data, require("./en.js")) ;(() => { data.meta_direction = "rtl" + data.meta_use_boring_font = true data.go_to_profile = "برو به نمایه" data.go_to_post = "برو به پست" diff --git a/src/lang/ru.js b/src/lang/ru.js index ba5f4f0..ed84b9f 100644 --- a/src/lang/ru.js +++ b/src/lang/ru.js @@ -5,6 +5,7 @@ if (!constants.language_dev) Object.assign(data, require("./en.js")) ;(() => { data.meta_direction = "ltr" + data.meta_use_boring_font = true data.go_to_profile = "Перейти в профиль" data.go_to_post = "Перейти в публикацию" diff --git a/src/lang/uk.js b/src/lang/uk.js index f7ba0f8..88972c8 100644 --- a/src/lang/uk.js +++ b/src/lang/uk.js @@ -5,6 +5,7 @@ if (!constants.language_dev) Object.assign(data, require("./en.js")) ;(() => { data.meta_direction = "ltr" + data.meta_use_boring_font = true data.go_to_profile = "Перейти до профілю" data.go_to_post = "Перейти до допису" diff --git a/src/lib/structures/BaseUser.js b/src/lib/structures/BaseUser.js index 33cf81c..76a378b 100644 --- a/src/lib/structures/BaseUser.js +++ b/src/lib/structures/BaseUser.js @@ -1,6 +1,7 @@ const constants = require("../constants") const {proxyProfilePic} = require("../utils/proxyurl") const {structure} = require("../utils/structuretext") +const {isLatin} = require("../utils/islatin") const rewriters = { rewrite_youtube: ["youtube.com", "www.youtube.com", "m.youtube.com", "youtu.be"], @@ -52,6 +53,11 @@ class BaseUser { return structure(this.data.biography) } + bioIsLatin() { + if (typeof this.data.biography !== "string") return true + return isLatin(this.data.biography) + } + getTtl(scale = 1) { const expiresAt = this.cachedAt + constants.caching.resource_cache_time const ttl = expiresAt - Date.now() diff --git a/src/lib/structures/TimelineEntry.js b/src/lib/structures/TimelineEntry.js index 8e1a46e..97c64c1 100644 --- a/src/lib/structures/TimelineEntry.js +++ b/src/lib/structures/TimelineEntry.js @@ -5,6 +5,7 @@ const collectors = require("../collectors") const {structure, removeTrailingHashtags} = require("../utils/structuretext") const TimelineBaseMethods = require("./TimelineBaseMethods") const TimelineChild = require("./TimelineChild") +const {isLatin} = require("../utils/islatin") require("../testimports")(collectors, TimelineChild, TimelineBaseMethods) const rssDescriptionTemplate = compile(` @@ -138,6 +139,13 @@ class TimelineEntry extends TimelineBaseMethods { else return caption.split("\n")[0].split(". ")[0] } + captionIsLatin() { + // the caption introduction is likely to be more meaningful for analysis than the full caption. + const introduction = this.getCaptionIntroduction() + if (typeof introduction !== "string") return true + return isLatin(introduction) + } + /** * Alt text is not available for N2, the caption or a placeholder string will be returned instead. * @override diff --git a/src/lib/utils/islatin.js b/src/lib/utils/islatin.js new file mode 100644 index 0000000..2db674d --- /dev/null +++ b/src/lib/utils/islatin.js @@ -0,0 +1,16 @@ +function isLatin(text) { + // remove characters from the text that can be used in any script, such as numbers, basic punctuation, and emojis. + // the emoji regular expression is from https://stackoverflow.com/a/45138005 + const textWithoutMultilingual = text.replace(/[ .,?!¿¡#@$&%\/0-9\u{1f300}-\u{1f5ff}\u{1f900}-\u{1f9ff}\u{1f600}-\u{1f64f}\u{1f680}-\u{1f6ff}\u{2600}-\u{26ff}\u{2700}-\u{27bf}\u{1f1e6}-\u{1f1ff}\u{1f191}-\u{1f251}\u{1f004}\u{1f0cf}\u{1f170}-\u{1f171}\u{1f17e}-\u{1f17f}\u{1f18e}\u{3030}\u{2b50}\u{2b55}\u{2934}-\u{2935}\u{2b05}-\u{2b07}\u{2b1b}-\u{2b1c}\u{3297}\u{3299}\u{303d}\u{00a9}\u{00ae}\u{2122}\u{23f3}\u{24c2}\u{23e9}-\u{23ef}\u{25b6}\u{23f8}-\u{23fa}-]/ug, "") + + // avoid dividing by zero + if (textWithoutMultilingual.length == 0) return true + + // regular expression from https://stackoverflow.com/a/26900132 - it's close enough for these heuristics. + const latinText = textWithoutMultilingual.replace(/[^A-Za-zÀ-ÿ]/g, "") + + // if it's at least 60% latin characters, consider it to be latin. + return latinText.length > textWithoutMultilingual.length * 0.6 +} + +module.exports.isLatin = isLatin diff --git a/src/site/pug/home.pug b/src/site/pug/home.pug index e226bac..093b016 100644 --- a/src/site/pug/home.pug +++ b/src/site/pug/home.pug @@ -7,7 +7,7 @@ html(lang=settings.language) head title Bibliogram include includes/head - body.homepage + body.homepage(class={"use-boring-font": ll.meta_use_boring_font}) header h1.banner img.banner-image(src="/static/img/banner-min.svg" alt="Bibliogram") diff --git a/src/site/pug/post.pug b/src/site/pug/post.pug index 95e648d..ecc4d0c 100644 --- a/src/site/pug/post.pug +++ b/src/site/pug/post.pug @@ -46,6 +46,6 @@ html meta(property="og:image:alt" content=firstEntry.getAlt()) meta(property="og:site_name" content="Bibliogram") - body.post-page + body.post-page(class={"use-boring-font": !post.captionIsLatin()}) main +post(post, false) diff --git a/src/site/pug/settings.pug b/src/site/pug/settings.pug index ffac7b5..910a3e9 100644 --- a/src/site/pug/settings.pug +++ b/src/site/pug/settings.pug @@ -36,7 +36,7 @@ html(dir=ll.meta_direction, lang=settings.language) title= `${ll.t_settings} | Bibliogram` include includes/head script(src=getStaticURL("html", "/static/js/settings_message.js") type="module") - body.settings-page + body.settings-page(class={"use-boring-font": ll.meta_use_boring_font}) if status && message .status-notice(class=status)= message main.settings diff --git a/src/site/pug/user.pug b/src/site/pug/user.pug index 7cd2b9d..0b8ec8e 100644 --- a/src/site/pug/user.pug +++ b/src/site/pug/user.pug @@ -37,7 +37,7 @@ html meta(property="og:image:type" content="image/jpeg") meta(property="og:site_name" content="Bibliogram") - body + body(class={"use-boring-font": !user.bioIsLatin()}) nav(class=(settings.display_top_nav ? "always-displayed" : "")).top-nav //- Alt text guidelines from https://axesslab.com/alt-texts/ a(href="/").nav-icon-link diff --git a/src/site/sass/includes/_main.sass b/src/site/sass/includes/_main.sass index 4e87e01..fe496ee 100644 --- a/src/site/sass/includes/_main.sass +++ b/src/site/sass/includes/_main.sass @@ -11,8 +11,10 @@ $theme: () !default body font-family: "Bariol", sans-serif -[lang="uk"] body // consistent cyrillic - font-family: "Cantarell", sans-serif +body.use-boring-font + // this is useful so that cyrillic and other scripts don't look conspicuous compared to nearby latin letters. + // the use-boring-font class is activated based on the page and heuristics of its contents + font-family: sans-serif input, button, textarea font-family: inherit