From 5201a6612bb01e482c991a1fde67d96616e15ba8 Mon Sep 17 00:00:00 2001 From: Cadence Fish Date: Tue, 18 Feb 2020 13:39:20 +1300 Subject: [PATCH] Rewrite feeds --- README.md | 2 ++ package-lock.json | 47 ++++++++++++----------------- package.json | 2 +- src/lib/collectors.js | 13 ++++++++ src/lib/constants.js | 2 +- src/lib/structures/ReelUser.js | 5 +++ src/lib/structures/Timeline.js | 27 +++++++++++------ src/lib/structures/TimelineEntry.js | 11 ++++--- src/lib/utils/request.js | 2 +- src/site/api/feed.js | 28 ++++++++++++----- src/site/api/routes.js | 2 +- src/site/pug/home.pug | 2 +- src/site/pug/includes/feed_link.pug | 9 ++++++ src/site/pug/user.pug | 26 +++++++++------- src/site/sass/main.sass | 3 ++ src/site/server.js | 2 +- 16 files changed, 118 insertions(+), 65 deletions(-) create mode 100644 src/site/pug/includes/feed_link.pug diff --git a/README.md b/README.md index 9abfdc0..d4c01e6 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,9 @@ See [Wiki:Installing](https://github.com/cloudrac3r/bibliogram/wiki/Installing) - `/` - homepage - `/u/{username}` - load a user's profile and timeline - `/u/{username}/rss.xml` - get the RSS feed for a user +- `/u/{username}/atom.xml` - get the Atom feed for a user - `/p/{shortcode}` - load a post +- `/privacy` - privacy policy ## Credits diff --git a/package-lock.json b/package-lock.json index fc48503..baecca2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1096,6 +1096,13 @@ "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" }, + "feed": { + "version": "github:cloudrac3r/feed#dbd55889e9c7135a8710eaa4d4c415ffeee7fc27", + "from": "github:cloudrac3r/feed#dbd55889e9c7135a8710eaa4d4c415ffeee7fc27", + "requires": { + "xml-js": "^1.6.11" + } + }, "fill-range": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", @@ -3126,30 +3133,6 @@ "glob": "^7.1.3" } }, - "rss": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/rss/-/rss-1.2.2.tgz", - "integrity": "sha1-UKFpiHYTgTOnT5oF0r3I240nqSE=", - "requires": { - "mime-types": "2.1.13", - "xml": "1.0.1" - }, - "dependencies": { - "mime-db": { - "version": "1.25.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.25.0.tgz", - "integrity": "sha1-wY29fHOl2/b0SgJNwNFloeexw5I=" - }, - "mime-types": { - "version": "2.1.13", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.13.tgz", - "integrity": "sha1-4HqqnGxrmnyjASxpADrSWjnpKog=", - "requires": { - "mime-db": "~1.25.0" - } - } - } - }, "safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", @@ -3208,6 +3191,11 @@ } } }, + "sax": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", + "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" + }, "scss-tokenizer": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/scss-tokenizer/-/scss-tokenizer-0.2.3.tgz", @@ -5023,10 +5011,13 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-7.2.1.tgz", "integrity": "sha512-sucePNSafamSKoOqoNfBd8V0StlkzJKL2ZAhGQinCfNQ+oacw+Pk7lcdAElecBF2VkLNZRiIb5Oi1Q5lVUVt2A==" }, - "xml": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/xml/-/xml-1.0.1.tgz", - "integrity": "sha1-eLpyAgApxbyHuKgaPPzXS0ovweU=" + "xml-js": { + "version": "1.6.11", + "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz", + "integrity": "sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==", + "requires": { + "sax": "^1.2.4" + } }, "y18n": { "version": "3.2.1", diff --git a/package.json b/package.json index 41a1a92..9f7857f 100644 --- a/package.json +++ b/package.json @@ -12,12 +12,12 @@ "license": "AGPL-3.0-only", "dependencies": { "better-sqlite3": "^5.4.3", + "feed": "github:cloudrac3r/feed#dbd55889e9c7135a8710eaa4d4c415ffeee7fc27", "mixin-deep": "^2.0.1", "node-dir": "^0.1.17", "node-fetch": "^2.6.0", "pinski": "github:cloudrac3r/pinski#9eb56d90fdd00357451dd5a546dbcca1f9bf114a", "pug": "^2.0.4", - "rss": "^1.2.2", "semver": "^7.1.2", "sharp": "^0.24.0", "socks-proxy-agent": "github:cloudrac3r/node-socks-proxy-agent#6a26d274b12098dfef6cc2faafd25b0c051f2467" diff --git a/src/lib/collectors.js b/src/lib/collectors.js index 1efd267..95eb61a 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -13,6 +13,10 @@ const userRequestCache = new UserRequestCache(constants.caching.resource_cache_t const timelineEntryCache = new TtlCache(constants.caching.resource_cache_time) const history = new RequestHistory(["user", "timeline", "post", "reel"]) +/** + * @param {string} username + * @param {boolean} isRSS + */ async function fetchUser(username, isRSS) { let mode = constants.allow_user_from_reel if (mode === "preferForRSS") { @@ -38,6 +42,10 @@ async function fetchUser(username, isRSS) { } } +/** + * @param {string} username + * @returns {Promise} + */ function fetchUserFromHTML(username) { return userRequestCache.getOrFetch("user/"+username, false, true, () => { return switcher.request("user_html", `https://www.instagram.com/${username}/`, async res => { @@ -72,6 +80,11 @@ function fetchUserFromHTML(username) { }) } +/** + * @param {string} userID + * @param {string} username + * @returns {Promise} + */ function fetchUserFromCombined(userID, username) { // Fetch basic user information const p = new URLSearchParams() diff --git a/src/lib/constants.js b/src/lib/constants.js index f02f9fe..ae50ba5 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -6,7 +6,7 @@ let constants = { // Things that server owners _should_ change! - website_origin: "http://localhost:10407", + website_origin: "http://localhost:10407", // Protocol and domain that this instance is hosted on. Do NOT include a trailing slash. has_privacy_policy: false, // You MUST read /src/site/pug/privacy.pug.template before changing this! // Things that server owners _could_ change if they want to. diff --git a/src/lib/structures/ReelUser.js b/src/lib/structures/ReelUser.js index 48aa559..d236cff 100644 --- a/src/lib/structures/ReelUser.js +++ b/src/lib/structures/ReelUser.js @@ -13,11 +13,16 @@ class ReelUser { this.following = 0 this.followedBy = 0 this.posts = 0 + /** @type {import("./Timeline")} */ this.timeline = new Timeline(this) this.cachedAt = Date.now() this.proxyProfilePicture = proxyImage(this.data.profile_pic_url) } + getStructuredBio() { + return null + } + getTtl(scale = 1) { const expiresAt = this.cachedAt + constants.caching.resource_cache_time const ttl = expiresAt - Date.now() diff --git a/src/lib/structures/Timeline.js b/src/lib/structures/Timeline.js index 1ff7f98..3b353ee 100644 --- a/src/lib/structures/Timeline.js +++ b/src/lib/structures/Timeline.js @@ -1,4 +1,4 @@ -const RSS = require("rss") +const {Feed} = require("feed") const constants = require("../constants") const config = require("../../../config") const TimelineEntry = require("./TimelineEntry") @@ -54,18 +54,27 @@ class Timeline { } async fetchFeed() { - const feed = new RSS({ - title: `@${this.user.data.username}`, - feed_url: `${config.website_origin}/u/${this.user.data.username}/rss.xml`, - site_url: config.website_origin, + // we likely cannot use full_name here - reel fallback would make the feed title inconsistent, leading to confusing experience + const usedName = `@${this.user.data.username}` + const feed = new Feed({ + title: usedName, description: this.user.data.biography, - image_url: config.website_origin+this.user.proxyProfilePicture, - pubDate: new Date(this.user.cachedAt), - ttl: this.user.getTtl(1000*60) // scale to minute + id: `bibliogram:user/${this.user.data.username}`, + link: `${constants.website_origin}/u/${this.user.data.username}`, + feedLinks: { + rss: `${constants.website_origin}/u/${this.user.data.username}/rss.xml`, + atom: `${constants.website_origin}/u/${this.user.data.username}/atom.xml` + }, + image: constants.website_origin+this.user.proxyProfilePicture, + updated: new Date(this.user.cachedAt), + author: { + name: usedName, + link: `${constants.website_origin}/u/${this.user.data.username}` + } }) const page = this.pages[0] // only get posts from first page await Promise.all(page.map(item => - item.fetchFeedData().then(feedData => feed.item(feedData)) + item.fetchFeedData().then(feedData => feed.addItem(feedData)) )) return feed } diff --git a/src/lib/structures/TimelineEntry.js b/src/lib/structures/TimelineEntry.js index e311599..9c426d9 100644 --- a/src/lib/structures/TimelineEntry.js +++ b/src/lib/structures/TimelineEntry.js @@ -217,6 +217,9 @@ class TimelineEntry extends TimelineBaseMethods { else return this.update().then(() => this.getVideoUrlP()) } + /** + * @returns {Promise} + */ async fetchFeedData() { const children = await this.fetchChildren() return { @@ -230,10 +233,10 @@ class TimelineEntry extends TimelineBaseMethods { height: child.data.dimensions.height })) }), - author: this.data.owner.username, - url: `${constants.website_origin}/p/${this.data.shortcode}`, - guid: `${constants.website_origin}/p/${this.data.shortcode}`, // Is it wise to keep the origin in here? The same post would have a different ID from different servers. - date: new Date(this.data.taken_at_timestamp*1000) + link: `${constants.website_origin}/p/${this.data.shortcode}`, + id: `bibliogram:post/${this.data.shortcode}`, // Is it wise to keep the origin in here? The same post would have a different ID from different servers. + published: new Date(this.data.taken_at_timestamp*1000), // first published date + date: new Date(this.data.taken_at_timestamp*1000) // last modified date /* Readers should display the description as HTML rather than using the media enclosure. enclosure: { diff --git a/src/lib/utils/request.js b/src/lib/utils/request.js index b98b476..1df5a46 100644 --- a/src/lib/utils/request.js +++ b/src/lib/utils/request.js @@ -3,7 +3,7 @@ const fetch = require("node-fetch").default function request(url, options = {}, settings = {}) { if (settings.statusLine === undefined) settings.statusLine = "OUT" if (settings.log === undefined) settings.log = true - if (settings.log) console.log(`-> [${settings.statusLine}] ${url}`) // todo: make more like pinski? + if (settings.log) console.log(` -> [${settings.statusLine}] ${url}`) // todo: make more like pinski? // @ts-ignore return fetch(url, Object.assign({ headers: { diff --git a/src/site/api/feed.js b/src/site/api/feed.js index e5a2e34..853d4f3 100644 --- a/src/site/api/feed.js +++ b/src/site/api/feed.js @@ -4,15 +4,29 @@ const {render} = require("pinski/plugins") const {pugCache} = require("../passthrough") module.exports = [ - {route: `/u/(${constants.external.username_regex})/rss.xml`, methods: ["GET"], code: ({fill}) => { + {route: `/u/(${constants.external.username_regex})/(rss|atom)\\.xml`, methods: ["GET"], code: ({fill}) => { if (constants.settings.rss_enabled) { + const kind = fill[1] return fetchUser(fill[0], true).then(async user => { - const content = await user.timeline.fetchFeed() - const xml = content.xml() + const feed = await user.timeline.fetchFeed() + if (kind === "rss") { + var data = { + contentType: "application/rss+xml", // see https://stackoverflow.com/questions/595616/what-is-the-correct-mime-type-to-use-for-an-rss-feed, + content: feed.rss2() + } + } else if (kind === "atom") { + var data = { + contentType: "application/atom+xml", // see https://en.wikipedia.org/wiki/Atom_(standard)#Including_in_HTML + content: feed.atom1() + } + } return { statusCode: 200, - contentType: "application/rss+xml", // see https://stackoverflow.com/questions/595616/what-is-the-correct-mime-type-to-use-for-an-rss-feed - content: xml + contentType: data.contentType, + headers: { + "Cache-Control": `max-age=${userRequestCache.getTtl("user/"+user.data.username, 1000)}` + }, + content: data.content } }).catch(error => { if (error === constants.symbols.NOT_FOUND || error === constants.symbols.ENDPOINT_OVERRIDDEN) { @@ -40,8 +54,8 @@ module.exports = [ } else { return Promise.resolve(render(403, "pug/friendlyerror.pug", { statusCode: 403, - title: "RSS disabled", - message: "RSS is disabled on this instance.", + title: "Feeds disabled", + message: "Feeds are disabled on this instance.", withInstancesLink: true })) } diff --git a/src/site/api/routes.js b/src/site/api/routes.js index 85daf66..c1be33d 100644 --- a/src/site/api/routes.js +++ b/src/site/api/routes.js @@ -58,7 +58,7 @@ module.exports = [ if (typeof page === "number" && !isNaN(page) && page >= 1) { await user.timeline.fetchUpToPage(page - 1) } - return render(200, "pug/user.pug", {url, user, constants}) + return render(200, "pug/user.pug", {url, user, constants, website_origin: constants.website_origin}) }).catch(error => { if (error === constants.symbols.NOT_FOUND || error === constants.symbols.ENDPOINT_OVERRIDDEN) { return render(404, "pug/friendlyerror.pug", { diff --git a/src/site/pug/home.pug b/src/site/pug/home.pug index 4e88271..fd78450 100644 --- a/src/site/pug/home.pug +++ b/src/site/pug/home.pug @@ -8,7 +8,7 @@ html body.homepage header h1.banner - img.banner-image(src="/static/img/banner-min.svg") + img.banner-image(src="/static/img/banner-min.svg" alt="Bibliogram") .go-sections-container .go-sections section diff --git a/src/site/pug/includes/feed_link.pug b/src/site/pug/includes/feed_link.pug new file mode 100644 index 0000000..b30eeb2 --- /dev/null +++ b/src/site/pug/includes/feed_link.pug @@ -0,0 +1,9 @@ +mixin feed_link(name, urlPart, username, contentType) + span + a(rel="alternate" type=contentType href=`/u/${username}/${urlPart}.xml`) + = name + sup.validate-feed + - + let params = new URLSearchParams() + params.set("url", `${website_origin}/u/${username}/${urlPart}.xml`) + a(href="https://validator.w3.org/feed/check.cgi?"+params.toString() title="Validate this feed") v! diff --git a/src/site/pug/user.pug b/src/site/pug/user.pug index 5f0d0de..5bad081 100644 --- a/src/site/pug/user.pug +++ b/src/site/pug/user.pug @@ -1,8 +1,9 @@ -//- Needs user, url, constants +//- Needs user, url, constants, website_origin include includes/timeline_page.pug include includes/next_page_button.pug include includes/display_structured +include includes/feed_link - const numberFormat = new Intl.NumberFormat().format @@ -19,7 +20,7 @@ html .main-divider header.profile-overview .profile-sticky - img(src=user.proxyProfilePicture width="150px" height="150px" alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp + img(src=user.proxyProfilePicture width=150 height=150 alt=`${user.data.full_name || user.data.username}'s profile picture.`).pfp //- Instagram only uses the above URL, but an HD version is also available. The alt text is pathetic, I know. I don't have much to work with. @@ -28,20 +29,23 @@ html h2.username= `@${user.data.username}` else h1.full-name= `@${user.data.username}` - if !user.fromReel - p.structured-text.bio - - const bio = user.getStructuredBio() - if bio - +display_structured(bio) - if user.data.external_url - p.website - a(href=user.data.external_url)= user.data.external_url + p.structured-text.bio + - const bio = user.getStructuredBio() + if bio + +display_structured(bio) + if user.data.external_url + p.website + a(href=user.data.external_url)= user.data.external_url + if user.posts != undefined div.profile-counter #[span(data-numberformat=user.posts).count #{numberFormat(user.posts)}] posts + if user.following != undefined div.profile-counter #[span(data-numberformat=user.following).count #{numberFormat(user.following)}] following + if user.followedBy != undefined div.profile-counter #[span(data-numberformat=user.followedBy).count #{numberFormat(user.followedBy)}] followed by div.links if constants.settings.rss_enabled - a(rel="alternate" type="application/rss+xml" href=`/u/${user.data.username}/rss.xml`) RSS + +feed_link("RSS", "rss", user.data.username, "application/rss+xml") + +feed_link("Atom", "atom", user.data.username, "application/atom+xml") a(rel="noreferrer noopener" href=`https://www.instagram.com/${user.data.username}`) instagram.com - const hasPosts = !user.data.is_private && user.timeline.pages.length && user.timeline.pages[0].length diff --git a/src/site/sass/main.sass b/src/site/sass/main.sass index 357f295..100a87e 100644 --- a/src/site/sass/main.sass +++ b/src/site/sass/main.sass @@ -102,6 +102,9 @@ body flex-wrap: wrap justify-content: center + .validate-feed + margin-left: 2px + a, a:visited color: $main-theme-link-color diff --git a/src/site/server.js b/src/site/server.js index c3c70e1..99f54f2 100644 --- a/src/site/server.js +++ b/src/site/server.js @@ -21,7 +21,6 @@ subdirs("pug", async (err, dirs) => { pinski.addPugDir("pug", dirs) pinski.addAPIDir("html/static/js/templates/api") pinski.addSassDir("sass") - pinski.addAPIDir("api") pinski.muteLogsStartingWith("/imageproxy") pinski.muteLogsStartingWith("/videoproxy") pinski.muteLogsStartingWith("/static") @@ -30,6 +29,7 @@ subdirs("pug", async (err, dirs) => { await require("../lib/utils/tor") // make sure tor state is known before going further } + pinski.addAPIDir("api") pinski.startServer() pinski.enableWS()