diff --git a/scripts/process_log.js b/scripts/process_log.js new file mode 100644 index 0000000..12c8a3c --- /dev/null +++ b/scripts/process_log.js @@ -0,0 +1,206 @@ +// @ts-check + +const path = require("path") +const readline = require("readline") +const fs = require("fs") + +const logPath = process.argv[2] +if (!logPath) { + console.log("Specify the path to the nginx log file as a command line argument.") + process.exit(1) +} + +const outPath = "./dates.csv" + +const reader = readline.createInterface({ + input: fs.createReadStream(logPath), +}) + +class MapOfNumber { + constructor() { + this.backing = new Map() + } + + add(value) { + if (this.backing.has(value)) { + this.backing.set(value, this.backing.get(value) + 1) + } else { + this.backing.set(value, 1) + } + } + + get(value) { + return this.backing.has(value) ? this.backing.get(value) : 0 + } +} + +/** + * @template K,V + */ +class MapOfArray { + constructor() { + /** @type {Map} */ + this.backing = new Map() + } + + add(key, value) { + if (this.backing.has(key)) { + this.backing.get(key).push(key, value) + } else { + this.backing.set(key, [value]) + } + } + + get(key) { + return this.backing.has(key) ? this.backing.get(key) : [] + } +} + +class DateCollection { + constructor(startTime, interval) { + this.startTime = startTime + this.interval = interval + /** @type {Map} */ + this.backing = new Map() + } + + timestampToIndex(timestamp) { + return Math.floor((timestamp - this.startTime) / this.interval) + } + + add(key, timestamp) { + const index = this.timestampToIndex(timestamp) + let row + if (!this.backing.has(key)) { + row = [] + this.backing.set(key, []) + } else { + row = this.backing.get(key) + } + while (row.length < index+1) { + row.push(0) + } + row[index]++ + } +} + +// ip, date, method, path, status, bytes, userAgent +const regex = /^([^ ]+) - - \[([^\]]+)\] "([A-Z]+) ([^"]+) HTTP\/(?:1.0|1.1|2.0)" ([0-9]+) ([0-9]+) "([^"]+)"$/ + +function parseLine(line) { + const result = line.match(regex) + return { + ip: result[1], + date: result[2], + method: result[3], + path: result[4], + status: result[5], + bytes: +result[6], + userAgent: result[7] + } +} + +const additionalStatic = ["/android-chrome-512x512.png", "/safari-pinned-tab.svg", "/robots.txt", "/site.webmanifest", "/apple-touch-icon.png", "/favicon-32x32.png", "/favicon-16x16.png", "/favicon.ico", "/android-chrome-192x192.png"] + +let total = 0 +let ipv4c = 0 +let ipv6c = 0 +const ipSet = new Set() +let kinds = { + proxied: 0, + feed: 0, + users: 0, + posts: 0, + static: 0, + userFragments: 0, + postFragments: 0, + fpredirects: 0, + home: 0, + api: 0 +} +const statuses = new MapOfNumber() +/** @type {DateCollection} */ +let dateCollection = null + +reader.on("line", line => { + const parsed = parseLine(line) + const dateObject = new Date(parsed.date.replace(":", " ")) + + //console.log(parsed) + + if (!dateCollection) { + dateCollection = new DateCollection(dateObject.getTime(), 10*60*1000) + } + + total++ + ipSet.add(parsed.ip) + + if (parsed.ip.includes(":")) ipv6c++ + else ipv4c++ + + statuses.add(parsed.status) + + let kind = null + if (parsed.path === "/") { + kind = "home"; + } else if (parsed.path.startsWith("/imageproxy") || parsed.path.startsWith("/videoproxy")) { + kind = "proxied"; + } else if (parsed.path.endsWith(".xml")) { + kind = "feed"; + } else if (parsed.path.startsWith("/static/") || additionalStatic.includes(parsed.path)) { + kind = "static"; + } else if (parsed.path.startsWith("/u/")) { + kind = "users"; + } else if (parsed.path.startsWith("/p/")) { + kind = "posts"; + } else if (parsed.path.startsWith("/fragment/user")) { + kind = "userFragments"; + } else if (parsed.path.startsWith("/fragment/post")) { + kind = "postFragments"; + } else if (parsed.path.startsWith("/u") || parsed.path.startsWith("/p")) { + kind = "fpredirects"; + } else if (parsed.path.startsWith("/api/") || parsed.path === "/.well-known/nodeinfo") { + kind = "api"; + } else if (parsed.status !== "404" && parsed.status !== "301") { + //console.log(parsed) + } + if (kind) { + kinds[kind]++ + dateCollection.add(kind, dateObject.getTime()) + } +}) + +function numberSummary(part, total, padSize = 6) { + return `${part.toString().padStart(padSize, " ")} (${(part/total*100).toFixed(1).toString().padStart(4, " ")}%)` +} + +reader.on("close", () => { + console.log(`${total} total requests`) + console.log(`${numberSummary(ipv4c, total)} requests over IPv4`) + console.log(`${numberSummary(ipv6c, total)} requests over IPv6`) + console.log() + console.log(`${ipSet.size} unique IPs after anonymisation (#[a(href="https://bibliogram.art/privacy") see here])`) + console.log() + console.log(`${numberSummary(statuses.get("200") + statuses.get("206"), total)} requests resulted in 200 OK`) + console.log(`${numberSummary(statuses.get("301") + statuses.get("302") + statuses.get("303"), total)} requests resulted in 3XX redirect`) + console.log(`${numberSummary(statuses.get("503"), total)} requests resulted in 503 Service Unavailable (blocked)`) + console.log(`${numberSummary(statuses.get("404"), total)} requests resulted in 404 Not Found`) + console.log(`${numberSummary(statuses.get("502"), total)} requests resulted in 502 Bad Gateway`) + console.log() + console.log(`${numberSummary(kinds.static, total)} requests were for static content or static files`) + console.log(`${numberSummary(kinds.proxied, total)} requests were for proxied images/videos`) + console.log(`${numberSummary(kinds.feed, total)} requests were for feeds (!)`) + console.log(`${numberSummary(kinds.users, total)} requests were for users`) + console.log(`${numberSummary(kinds.userFragments, total)} requests were for user fragments (timeline continuation ajax)`) + console.log(`${numberSummary(kinds.postFragments, total)} requests were for post fragments (post overlay ajax)`) + console.log(`${numberSummary(kinds.posts, total)} requests were for posts`) + console.log(`${numberSummary(kinds.api, total)} requests were for the API`) + console.log(`${numberSummary(kinds.home, total)} requests were for the home page`) + console.log(`${numberSummary(kinds.fpredirects, total)} requests were seeking a user or post from the home page`) + console.log() + const out = fs.createWriteStream(outPath) + for (const entry of dateCollection.backing) { + out.write(`${entry[0]};${entry[1].join(";")}\n`) + } + console.log(`Overwrote ${outPath}`) +})