111 lines
3.8 KiB
Bash
Executable File
111 lines
3.8 KiB
Bash
Executable File
#!/bin/bash
|
||
#
|
||
# Specialized archive downloader for mailman v3.x
|
||
#
|
||
# Features
|
||
#
|
||
# * Download all gz and html and attachments
|
||
# * Filter: skip_gz, skip_attachment, skip_list, skip_email
|
||
# * Doesn't download twice a same file
|
||
# * Stores files for a list in its own folder
|
||
#
|
||
# Usage
|
||
#
|
||
# * Install gawk and curl
|
||
# * Run this script
|
||
# * Answer questions
|
||
# * Mailman URL: the "root" URL for the mailman software. No CGI page, no trailing slash
|
||
# * List name: the list ID, i.e. what appears after xxx.cgi/{list_name} in mailman URL
|
||
# * Admin password: the admin password for the list
|
||
#
|
||
# Flags and options
|
||
#
|
||
# You can pass flags and options by using the export command. For example
|
||
# If you want to skip gz files and attachments
|
||
# export skip_gz=1
|
||
# If you want to provide the url, name, and password with no questions asked
|
||
# export url=https://mailman.example.com
|
||
# export name=list-example.com
|
||
# export password=thisbemysecurepassword
|
||
#
|
||
# Notes
|
||
#
|
||
# * When gz is mentioned, they aren't actual compressed files... Beats me.
|
||
# * This script could use parallel. It's quite long and tedious.
|
||
#
|
||
# @author alban
|
||
# @since 2020-02-20
|
||
|
||
|
||
# Helper function: cURL wrapper with cookies and target file management
|
||
http(){
|
||
local cookie="${1}";
|
||
local url="${2}";
|
||
local file="${3}";
|
||
[[ -s "$file" ]] && return
|
||
local dir=$(dirname $file)
|
||
[[ ! -d "$dir" ]] && mkdir -p "$dir"
|
||
curl -s -H "Cookie: $cookie" "$url" -o $file && echo $file
|
||
}
|
||
|
||
# Helper function: exit + message
|
||
panic(){echo "$@"; exit 1; }
|
||
|
||
# Some dependencies are mandatory
|
||
which gawk &>/dev/null || panic "Please install gawk"
|
||
which curl &>/dev/null || panic "Please install curl"
|
||
|
||
# Read user provided options
|
||
[ -z "$url" ]] && read -p "Mailman URL (ex: https://list.example.com): " url
|
||
[ -z "$nom" ]] && read -p "List name (ex: list-example.com): " nom
|
||
[ -z "$password" ]] && read -p "Mot de passe admin: " password
|
||
|
||
# get the cookie
|
||
cookie_file=$(mktemp)
|
||
curl -D ${cookie_file} "${url}/admindb.cgi/$nom" -d admlogin=whatever -d adminpw=${password} -o /dev/null -s
|
||
cookie=$(grep Set-Cookie ${cookie_file} |gawk "match(\$0, /Set-Cookie: ([^;]*);.*/, a) {print a[1]}" -)
|
||
rm -f ${cookie_file}
|
||
|
||
# Get the main html
|
||
response_file="$nom/index.html"
|
||
http "$cookie" "${url}/private.cgi/${nom}" "$response_file" ""
|
||
echo "export response_file='$response_file'"
|
||
|
||
# Download the GZ files and attachments
|
||
prefix="${url}/private.cgi/${nom}"
|
||
if [[ -z "$skip_gz" ]] ; then
|
||
# href="2009-June.txt.gz">[ Text 2 KB Gzip<69>s]</a></td>
|
||
gawk "match(\$0, /.*([0-9]{4}-.*txt.gz)/,a) {print a[1]}" "$response_file"| while read gz; do
|
||
http "$cookie" "$url/private.cgi/${nom}/$gz" "$nom/$gz"
|
||
|
||
# Download attachments
|
||
if [[ -z "$skip_attachment" ]] ; then
|
||
# URL: <http://lists.domain.tld/private.cgi/list-domain.tld/attachments/20190503/225f91a0/attachment.html>
|
||
cat "$nom/$gz" | gawk 'match($0, /URL: <(.*?)>/,a) {print a[1]}' | while read attachment_url ; do
|
||
output_file="$nom/${attachment_url/$prefix/}"
|
||
http "$cookie" "$attachment_url" "$output_file"
|
||
done
|
||
fi
|
||
done
|
||
fi
|
||
|
||
# Download the HTML Files, lists ordered by date / thread / etc. first
|
||
if [[ -z "$skip_list" ]] ; then
|
||
|
||
# return a list of 2008-March/<type>.html
|
||
gawk 'match($0, /href="(.*html)"/, a) {print a[1]}' "$response_file"|while read list_url; do
|
||
list_dir=$(dirname $list_url)
|
||
http "$cookie" "$url/private.cgi/$nom/$list_url" "$nom/$list_url"
|
||
|
||
# Download single emails
|
||
if [[ -z "$skip_email" ]] ; then
|
||
# <LI><A HREF="020532.html">[List] Regardez la lumière mes jolis
|
||
cat "$nom/$list_url" | gawk 'match($0, /(href|HREF)="([0-9]+.html)"/,a) {print a[2]}' | while read email_url ; do
|
||
output_file="$nom/${list_dir}/${email_url}"
|
||
http "$cookie" "$url/private.cgi/$nom/$list_dir/$email_url" "$output_file"
|
||
done
|
||
fi
|
||
done
|
||
|
||
fi
|