2020-04-24 15:15:54 +00:00
|
|
|
// @ts-check
|
|
|
|
|
|
|
|
const path = require("path")
|
|
|
|
const readline = require("readline")
|
|
|
|
const fs = require("fs")
|
|
|
|
|
|
|
|
const logPath = process.argv[2]
|
|
|
|
if (!logPath) {
|
|
|
|
console.log("Specify the path to the nginx log file as a command line argument.")
|
|
|
|
process.exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
const outPath = "./dates.csv"
|
|
|
|
|
|
|
|
const reader = readline.createInterface({
|
|
|
|
input: fs.createReadStream(logPath),
|
|
|
|
})
|
|
|
|
|
|
|
|
class MapOfNumber {
|
|
|
|
constructor() {
|
|
|
|
this.backing = new Map()
|
|
|
|
}
|
|
|
|
|
2020-07-13 13:23:01 +00:00
|
|
|
add(key) {
|
|
|
|
this.backing.set(key, this.get(key) + 1)
|
2020-04-24 15:15:54 +00:00
|
|
|
}
|
|
|
|
|
2020-07-13 13:23:01 +00:00
|
|
|
get(key) {
|
|
|
|
return this.backing.has(key) ? this.backing.get(key) : 0
|
|
|
|
}
|
|
|
|
|
|
|
|
sort() {
|
|
|
|
return [...this.backing.entries()].sort((a, b) => (b[1] - a[1]))
|
2020-04-24 15:15:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @template K,V
|
|
|
|
*/
|
|
|
|
class MapOfArray {
|
|
|
|
constructor() {
|
|
|
|
/** @type {Map<K, V[]>} */
|
|
|
|
this.backing = new Map()
|
|
|
|
}
|
|
|
|
|
|
|
|
add(key, value) {
|
|
|
|
if (this.backing.has(key)) {
|
|
|
|
this.backing.get(key).push(key, value)
|
|
|
|
} else {
|
|
|
|
this.backing.set(key, [value])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
get(key) {
|
|
|
|
return this.backing.has(key) ? this.backing.get(key) : []
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class DateCollection {
|
|
|
|
constructor(startTime, interval) {
|
|
|
|
this.startTime = startTime
|
|
|
|
this.interval = interval
|
|
|
|
/** @type {Map<string, number[]>} */
|
|
|
|
this.backing = new Map()
|
|
|
|
}
|
|
|
|
|
|
|
|
timestampToIndex(timestamp) {
|
|
|
|
return Math.floor((timestamp - this.startTime) / this.interval)
|
|
|
|
}
|
|
|
|
|
|
|
|
add(key, timestamp) {
|
|
|
|
const index = this.timestampToIndex(timestamp)
|
|
|
|
let row
|
|
|
|
if (!this.backing.has(key)) {
|
|
|
|
row = []
|
|
|
|
this.backing.set(key, [])
|
|
|
|
} else {
|
|
|
|
row = this.backing.get(key)
|
|
|
|
}
|
|
|
|
while (row.length < index+1) {
|
|
|
|
row.push(0)
|
|
|
|
}
|
|
|
|
row[index]++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ip, date, method, path, status, bytes, userAgent
|
2020-04-24 15:33:37 +00:00
|
|
|
const regex = /^([^ ]+) - - \[([^\]]+)\] "([A-Z]+) ([^"]+) HTTP\/(?:1.0|1.1|2.0)" ([0-9]+) ([0-9]+) "([^"]*)"$/
|
2020-04-24 15:15:54 +00:00
|
|
|
|
|
|
|
function parseLine(line) {
|
|
|
|
const result = line.match(regex)
|
2020-04-24 15:24:13 +00:00
|
|
|
if (!result) {
|
2020-07-13 13:23:01 +00:00
|
|
|
// console.log("Line didn't match regular expression:")
|
|
|
|
// console.log(line)
|
2020-04-24 15:24:13 +00:00
|
|
|
return null
|
|
|
|
} else {
|
|
|
|
return {
|
|
|
|
ip: result[1],
|
|
|
|
date: result[2],
|
|
|
|
method: result[3],
|
|
|
|
path: result[4],
|
|
|
|
status: result[5],
|
|
|
|
bytes: +result[6],
|
|
|
|
userAgent: result[7]
|
|
|
|
}
|
2020-04-24 15:15:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-13 13:23:01 +00:00
|
|
|
const additionalStatic = ["/android-chrome-512x512.png", "/safari-pinned-tab.svg", "/robots.txt", "/bibliogram.webmanifest", "/apple-touch-icon.png", "/favicon-32x32.png", "/favicon-16x16.png", "/favicon.ico", "/android-chrome-192x192.png"]
|
2020-04-24 15:15:54 +00:00
|
|
|
|
|
|
|
let total = 0
|
|
|
|
let ipv4c = 0
|
|
|
|
let ipv6c = 0
|
|
|
|
const ipSet = new Set()
|
|
|
|
let kinds = {
|
|
|
|
proxied: 0,
|
|
|
|
feed: 0,
|
|
|
|
users: 0,
|
|
|
|
posts: 0,
|
|
|
|
static: 0,
|
|
|
|
userFragments: 0,
|
|
|
|
postFragments: 0,
|
|
|
|
fpredirects: 0,
|
|
|
|
home: 0,
|
|
|
|
api: 0
|
|
|
|
}
|
|
|
|
const statuses = new MapOfNumber()
|
2020-07-13 13:23:01 +00:00
|
|
|
const ips = new MapOfNumber()
|
2020-04-24 15:15:54 +00:00
|
|
|
/** @type {DateCollection} */
|
|
|
|
let dateCollection = null
|
|
|
|
|
|
|
|
reader.on("line", line => {
|
|
|
|
const parsed = parseLine(line)
|
2020-04-24 15:24:13 +00:00
|
|
|
if (!parsed) return
|
2020-04-24 15:15:54 +00:00
|
|
|
const dateObject = new Date(parsed.date.replace(":", " "))
|
|
|
|
|
|
|
|
//console.log(parsed)
|
|
|
|
|
|
|
|
if (!dateCollection) {
|
2020-07-13 13:23:01 +00:00
|
|
|
dateCollection = new DateCollection(dateObject.getTime(), 60*60*1000)
|
2020-04-24 15:15:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
total++
|
|
|
|
ipSet.add(parsed.ip)
|
|
|
|
|
|
|
|
if (parsed.ip.includes(":")) ipv6c++
|
|
|
|
else ipv4c++
|
|
|
|
|
|
|
|
statuses.add(parsed.status)
|
|
|
|
|
|
|
|
let kind = null
|
|
|
|
if (parsed.path === "/") {
|
|
|
|
kind = "home";
|
|
|
|
} else if (parsed.path.startsWith("/imageproxy") || parsed.path.startsWith("/videoproxy")) {
|
|
|
|
kind = "proxied";
|
|
|
|
} else if (parsed.path.endsWith(".xml")) {
|
|
|
|
kind = "feed";
|
|
|
|
} else if (parsed.path.startsWith("/static/") || additionalStatic.includes(parsed.path)) {
|
|
|
|
kind = "static";
|
|
|
|
} else if (parsed.path.startsWith("/u/")) {
|
|
|
|
kind = "users";
|
|
|
|
} else if (parsed.path.startsWith("/p/")) {
|
|
|
|
kind = "posts";
|
|
|
|
} else if (parsed.path.startsWith("/fragment/user")) {
|
|
|
|
kind = "userFragments";
|
|
|
|
} else if (parsed.path.startsWith("/fragment/post")) {
|
|
|
|
kind = "postFragments";
|
|
|
|
} else if (parsed.path.startsWith("/u") || parsed.path.startsWith("/p")) {
|
|
|
|
kind = "fpredirects";
|
|
|
|
} else if (parsed.path.startsWith("/api/") || parsed.path === "/.well-known/nodeinfo") {
|
|
|
|
kind = "api";
|
|
|
|
} else if (parsed.status !== "404" && parsed.status !== "301") {
|
|
|
|
//console.log(parsed)
|
|
|
|
}
|
|
|
|
if (kind) {
|
|
|
|
kinds[kind]++
|
|
|
|
dateCollection.add(kind, dateObject.getTime())
|
2020-07-30 14:44:28 +00:00
|
|
|
ips.add(parsed.ip)
|
2020-04-24 15:15:54 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
function numberSummary(part, total, padSize = 6) {
|
|
|
|
return `${part.toString().padStart(padSize, " ")} (${(part/total*100).toFixed(1).toString().padStart(4, " ")}%)`
|
|
|
|
}
|
|
|
|
|
|
|
|
reader.on("close", () => {
|
|
|
|
console.log(`${total} total requests`)
|
|
|
|
console.log(`${numberSummary(ipv4c, total)} requests over IPv4`)
|
|
|
|
console.log(`${numberSummary(ipv6c, total)} requests over IPv6`)
|
|
|
|
console.log()
|
|
|
|
console.log(`${ipSet.size} unique IPs after anonymisation (#[a(href="https://bibliogram.art/privacy") see here])`)
|
|
|
|
console.log()
|
|
|
|
console.log(`${numberSummary(statuses.get("200") + statuses.get("206"), total)} requests resulted in 200 OK`)
|
|
|
|
console.log(`${numberSummary(statuses.get("301") + statuses.get("302") + statuses.get("303"), total)} requests resulted in 3XX redirect`)
|
|
|
|
console.log(`${numberSummary(statuses.get("503"), total)} requests resulted in 503 Service Unavailable (blocked)`)
|
|
|
|
console.log(`${numberSummary(statuses.get("404"), total)} requests resulted in 404 Not Found`)
|
|
|
|
console.log(`${numberSummary(statuses.get("502"), total)} requests resulted in 502 Bad Gateway`)
|
|
|
|
console.log()
|
|
|
|
console.log(`${numberSummary(kinds.static, total)} requests were for static content or static files`)
|
|
|
|
console.log(`${numberSummary(kinds.proxied, total)} requests were for proxied images/videos`)
|
|
|
|
console.log(`${numberSummary(kinds.feed, total)} requests were for feeds (!)`)
|
|
|
|
console.log(`${numberSummary(kinds.users, total)} requests were for users`)
|
|
|
|
console.log(`${numberSummary(kinds.userFragments, total)} requests were for user fragments (timeline continuation ajax)`)
|
|
|
|
console.log(`${numberSummary(kinds.postFragments, total)} requests were for post fragments (post overlay ajax)`)
|
|
|
|
console.log(`${numberSummary(kinds.posts, total)} requests were for posts`)
|
|
|
|
console.log(`${numberSummary(kinds.api, total)} requests were for the API`)
|
|
|
|
console.log(`${numberSummary(kinds.home, total)} requests were for the home page`)
|
|
|
|
console.log(`${numberSummary(kinds.fpredirects, total)} requests were seeking a user or post from the home page`)
|
|
|
|
console.log()
|
|
|
|
const out = fs.createWriteStream(outPath)
|
|
|
|
for (const entry of dateCollection.backing) {
|
|
|
|
out.write(`${entry[0]};${entry[1].join(";")}\n`)
|
|
|
|
}
|
|
|
|
console.log(`Overwrote ${outPath}`)
|
2020-07-13 13:23:01 +00:00
|
|
|
console.log()
|
|
|
|
|
|
|
|
const sorted = ips.sort()
|
|
|
|
const percentile = 98
|
|
|
|
console.log("Top 10 IPs:")
|
|
|
|
console.log(sorted.slice(0, 10))
|
|
|
|
console.log(`${percentile}th percentile:`)
|
|
|
|
console.log(sorted[Math.floor(sorted.length / 100 * (100 - percentile))])
|
2020-04-24 15:15:54 +00:00
|
|
|
})
|