From fff2d74fe3721ec372544cb02669018c78f8a8f8 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 14 Apr 2020 03:46:23 +1200 Subject: [PATCH] Provide error page for age gated profiles --- src/lib/collectors.js | 59 +++--- src/lib/constants.js | 6 +- src/lib/utils/body.js | 27 ++- src/lib/utils/parser/parser.js | 3 +- src/site/api/feed.js | 2 + src/site/api/routes.js | 2 + src/site/pug/age_gated.pug | 11 ++ src/site/pug/blocked.pug | 2 - test/body.js | 21 ++- test/files/page-age-gated.html | 321 +++++++++++++++++++++++++++++++++ 10 files changed, 418 insertions(+), 36 deletions(-) create mode 100644 src/site/pug/age_gated.pug create mode 100644 test/files/page-age-gated.html diff --git a/src/lib/collectors.js b/src/lib/collectors.js index 34ce5a5..6df6a47 100644 --- a/src/lib/collectors.js +++ b/src/lib/collectors.js @@ -101,32 +101,41 @@ function fetchUserFromHTML(username) { // require down here or have to deal with require loop. require cache will take care of it anyway. // User -> Timeline -> TimelineEntry -> collectors -/> User const User = require("./structures/User") - const sharedData = extractSharedData(text) - const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) - history.report("user", true) - if (constants.caching.db_user_id) { - const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) - db.prepare( - "REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES " - +"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)" - ).run({ - username: user.data.username, - user_id: user.data.id, - created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(), - updated: Date.now(), - updated_version: constants.database_version, - biography: user.data.biography || null, - post_count: user.posts || 0, - following_count: user.following || 0, - followed_by_count: user.followedBy || 0, - external_url: user.data.external_url || null, - full_name: user.data.full_name || null, - is_private: +user.data.is_private, - is_verified: +user.data.is_verified, - profile_pic_url: user.data.profile_pic_url - }) + const result = extractSharedData(text) + if (result.status === constants.symbols.extractor_results.SUCCESS) { + const sharedData = result.value + const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user) + history.report("user", true) + if (constants.caching.db_user_id) { + const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username) + db.prepare( + "REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES " + +"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)" + ).run({ + username: user.data.username, + user_id: user.data.id, + created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(), + updated: Date.now(), + updated_version: constants.database_version, + biography: user.data.biography || null, + post_count: user.posts || 0, + following_count: user.following || 0, + followed_by_count: user.followedBy || 0, + external_url: user.data.external_url || null, + full_name: user.data.full_name || null, + is_private: +user.data.is_private, + is_verified: +user.data.is_verified, + profile_pic_url: user.data.profile_pic_url + }) + } + return user + } else if (result.status === constants.symbols.extractor_results.AGE_RESTRICTED) { + // I don't like this code. + history.report("user", true) + throw constants.symbols.extractor_results.AGE_RESTRICTED + } else { + throw result.status } - return user } }).catch(error => { if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) { diff --git a/src/lib/constants.js b/src/lib/constants.js index 3907fd2..346e799 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -100,11 +100,15 @@ let constants = { TYPE_GALLERY_IMAGE: Symbol("TYPE_GALLERY_IMAGE"), TYPE_GALLERY_VIDEO: Symbol("TYPE_GALLERY_VIDEO"), NOT_FOUND: Symbol("NOT_FOUND"), - NO_SHARED_DATA: Symbol("NO_SHARED_DATA"), INSTAGRAM_DEMANDS_LOGIN: Symbol("INSTAGRAM_DEMANDS_LOGIN"), RATE_LIMITED: Symbol("RATE_LIMITED"), ENDPOINT_OVERRIDDEN: Symbol("ENDPOINT_OVERRIDDEN"), NO_ASSISTANTS_AVAILABLE: Symbol("NO_ASSISTANTS_AVAILABLE"), + extractor_results: { + SUCCESS: Symbol("SUCCESS"), + AGE_RESTRICTED: Symbol("AGE_RESTRICTED"), + NO_SHARED_DATA: Symbol("NO_SHARED_DATA") + }, assistant_statuses: { OFFLINE: Symbol("OFFLINE"), BLOCKED: Symbol("BLOCKED"), diff --git a/src/lib/utils/body.js b/src/lib/utils/body.js index 9ae880f..264e949 100644 --- a/src/lib/utils/body.js +++ b/src/lib/utils/body.js @@ -3,17 +3,40 @@ const {Parser} = require("./parser/parser") /** * @param {string} text + * @returns {{status: symbol, value: any}} */ function extractSharedData(text) { const parser = new Parser(text) const index = parser.seek("window._sharedData = ", {moveToMatch: true, useEnd: true}) - if (index === -1) throw constants.symbols.NO_SHARED_DATA + if (index === -1) { + // Maybe the profile is age restricted? + const age = getRestrictedAge(text) + if (age !== null) { // Correct. + return {status: constants.symbols.extractor_results.AGE_RESTRICTED, value: age} + } + return {status: constants.symbols.extractor_results.NO_SHARED_DATA, value: null} + } parser.store() const end = parser.seek(";") parser.restore() const sharedDataString = parser.slice(end - parser.cursor) const sharedData = JSON.parse(sharedDataString) - return sharedData + return {status: constants.symbols.extractor_results.SUCCESS, value: sharedData} +} + +/** + * @param {string} text + */ +function getRestrictedAge(text) { + const parser = new Parser(text) + let index = parser.seek("

Restricted profile

", {moveToMatch: true, useEnd: true}) + if (index === -1) return null + index = parser.seek("

", {moveToMatch: true, useEnd: true}) + if (index === -1) return null + const explanation = parser.get({split: "

"}).trim() + const match = explanation.match(/You must be (\d+?) years? old or over to see this profile/) + if (!match) return null + return +match[1] // the age } module.exports.extractSharedData = extractSharedData diff --git a/src/lib/utils/parser/parser.js b/src/lib/utils/parser/parser.js index a9ab7f6..b5f7902 100644 --- a/src/lib/utils/parser/parser.js +++ b/src/lib/utils/parser/parser.js @@ -37,6 +37,7 @@ class Parser { } /** + * Get the next element from the buffer, either up to a token or between two tokens, and update the cursor. * @param {GetOptions} [options] * @returns {String} */ @@ -123,7 +124,7 @@ class Parser { } /** - * Seek past the next occurance of the string. + * Seek to or past the next occurance of the string. * @param {string} toFind * @param {{moveToMatch?: boolean, useEnd?: boolean}} options both default to false */ diff --git a/src/site/api/feed.js b/src/site/api/feed.js index a5fdb28..c01262e 100644 --- a/src/site/api/feed.js +++ b/src/site/api/feed.js @@ -47,6 +47,8 @@ module.exports = [ expiresMinutes: userRequestCache.getTtl("user/"+fill[0], 1000*60) }) } + } else if (error === constants.symbols.extractor_results.AGE_RESTRICTED) { + return render(403, "pug/age_gated.pug") } else { throw error } diff --git a/src/site/api/routes.js b/src/site/api/routes.js index 53dc596..d5ceb6e 100644 --- a/src/site/api/routes.js +++ b/src/site/api/routes.js @@ -89,6 +89,8 @@ module.exports = [ expiresMinutes: userRequestCache.getTtl("user/"+fill[0], 1000*60) }) } + } else if (error === constants.symbols.extractor_results.AGE_RESTRICTED) { + return render(403, "pug/age_gated.pug") } else { throw error } diff --git a/src/site/pug/age_gated.pug b/src/site/pug/age_gated.pug new file mode 100644 index 0000000..b9e10e1 --- /dev/null +++ b/src/site/pug/age_gated.pug @@ -0,0 +1,11 @@ +include includes/error.pug + +doctype html +html + head + title= `Restricted profile | Bibliogram` + include includes/head + body.error-page + +error(403, "Restricted profile", false) + | This profile is age restricted. + | You must log in to Instagram to view this profile. diff --git a/src/site/pug/blocked.pug b/src/site/pug/blocked.pug index f51c7c5..ba5ee61 100644 --- a/src/site/pug/blocked.pug +++ b/src/site/pug/blocked.pug @@ -2,8 +2,6 @@ include includes/error.pug -- const numberFormat = new Intl.NumberFormat().format - doctype html html head diff --git a/test/body.js b/test/body.js index b6ace7b..ea52028 100644 --- a/test/body.js +++ b/test/body.js @@ -4,16 +4,27 @@ const {extractSharedData} = require("../src/lib/utils/body") const fs = require("fs").promises tap.test("extract shared data", async childTest => { - childTest.throws(() => extractSharedData(""), constants.symbols.NO_SHARED_DATA, "not found in blank") + { + const result = extractSharedData("") + childTest.equal(result.status, constants.symbols.extractor_results.NO_SHARED_DATA, "not found in blank") + } { const page = await fs.readFile("test/files/page-user-instagram.html", "utf8") - const sharedData = extractSharedData(page) - childTest.equal(sharedData.entry_data.ProfilePage[0].graphql.user.username, "instagram", "can extract user page") + const result = extractSharedData(page) + childTest.equal(result.status, constants.symbols.extractor_results.SUCCESS, "extractor status success") + childTest.equal(result.value.entry_data.ProfilePage[0].graphql.user.username, "instagram", "can extract user page") } { const page = await fs.readFile("test/files/page-login.html", "utf8") - const sharedData = extractSharedData(page) - childTest.true(sharedData.entry_data.LoginAndSignupPage[0], "can extract login page") + const result = extractSharedData(page) + childTest.equal(result.status, constants.symbols.extractor_results.SUCCESS, "extractor status success") + childTest.true(result.value.entry_data.LoginAndSignupPage[0], "can extract login page") + } + { + const page = await fs.readFile("test/files/page-age-gated.html", "utf8") + const result = extractSharedData(page) + childTest.equal(result.status, constants.symbols.extractor_results.AGE_RESTRICTED, "extractor detects age restricted") + childTest.equal(result.value, 21, "correct age is extracted") } childTest.end() }) diff --git a/test/files/page-age-gated.html b/test/files/page-age-gated.html new file mode 100644 index 0000000..744fc22 --- /dev/null +++ b/test/files/page-age-gated.html @@ -0,0 +1,321 @@ + + + + + + + + Restricted profile • Instagram + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+
+ + +
+
    +
  • + +
  • +
+
+ + + + +
+
+ + + + +
+ +
+ + + +
+ +
+ + +

Restricted profile

+ +

+ You must be 21 years old or over to see this profile +

+ + + +
+ +
+ + +
+ + + + + +
+
+ + + + + + + + + + + + +