450 lines
17 KiB
Python
Executable File
450 lines
17 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# vi: set et sw=4 st=4:
|
|
#
|
|
# 2004-08-27 Jim Tittsler <jwt@starship.python.net>
|
|
# 2004-10-03 jwt change authentication
|
|
# 2004-10-04 jwt remove dependency on ClientCookie
|
|
# 2004-10-07 jwt use getopt to retrieve host, list, password from command
|
|
# 2004-10-10 jwt return to using ClientCookie
|
|
# 2004-10-13 jwt add --fullnames option
|
|
# 2005-02-15 jwt switch on RFC2965 cookie support when newer version
|
|
# of ClientCookie is detected
|
|
# 2005-02-16 jwt use Python 2.4's cookielib if it is available
|
|
# 2005-02-27 jwt only visit the roster page for letters that exist
|
|
# 2005-06-04 mas add --nomail option (Mark Sapiro <mark@msapiro.net>)
|
|
# 2005-06-14 jwt handle chunks of email addresses starting [0-9]*
|
|
# 2006-01-27 mas Retry urllib2.URLError exceptions in main loop.
|
|
# Modify parser to get most of the member attributes on the
|
|
# page (I don't get nomail reason because I haven't yet
|
|
# figured out how, and I don't get the language option).
|
|
# This provides a foundation for adding options to deal
|
|
# with any of these attributes.
|
|
# 2006-01-28 mas Add --regular and --digest options.
|
|
# 2006-01-29 mas Get the nomail reason (I figured out how)
|
|
# Add the --csv option intended to produce a file that can
|
|
# be imported into a local spreadsheet. Mostly useful for
|
|
# larger lists when multiple sublists are desired and where
|
|
# multiple passes are expensive.
|
|
# 2006-04-10 mas Add some error checking for invalid URL (hostname),
|
|
# listname and password.
|
|
# 2006-04-11 mas Correct test on find(). Success is '>= 0', not 'True'.
|
|
# 2006-08-24 mas Catch more exceptions on invalid URLs.
|
|
# Add some more explaination of hostname and when
|
|
# member_url might need changing.
|
|
# 2006-09-20 Ed Lally <elally@jersey.net>
|
|
# 2006-09-20 ejl Add config variable for admin path (/mailman/admin/) for
|
|
# sites that don't use default URLs.
|
|
# 2006-09-21 mas Make Ed's change a command line option.
|
|
# 2007-05-07 mas Acommodate possible urllib.quote()ed email addresses.
|
|
# 2008-02-03 mas Clarify that script works with Membership list through
|
|
# 2.1.10.
|
|
# Fix broken --url_path option.
|
|
# 2008-10-06 mas Works with 2.1.11.
|
|
# Handle chunks starting with other than [0-9A-Z].
|
|
# Print verbose output to stderr.
|
|
# 2008-10-07 mas Added -U/--unhide option
|
|
# 2008-10-09 mas Forgot to make the unhide '.' prints conditional on
|
|
# verbose. Also, csv printed "on" for members changed to
|
|
# unhidden. Fixed.
|
|
# 2011-10-24 mas Added type to nomail selection.
|
|
# 2012-10-20 mas Encode real name as iso-8859-1 to avoid Unicode error
|
|
# with non-ascii.
|
|
# 2012-11-14 jak Added support to use HTTPS (james@jameskinnaird.ca)
|
|
# 2013-01-25 mas Revised the help for -u.
|
|
# 2014-11-26 mas Tested with 2.1.18 and Python 2.7.
|
|
# Updated for '401' status return for invalid password
|
|
# in recent Mailman versions.
|
|
# 2015-08-09 mas Changed the real name encoding to make more robust.
|
|
# 2015-08-11 mas More changes for encodings.
|
|
# 2015-12-04 mas Changed error message for bad login result page.
|
|
# 2018-10-17 mas Doc changes - --unhide doesn't work with recent Mailman.
|
|
#
|
|
|
|
"""List the email addresses subscribed to a mailing list, fetched from web.
|
|
|
|
Usage: %(PROGRAM)s [options] hostname listname password
|
|
|
|
Where:
|
|
--output file
|
|
-o file
|
|
Write output to specified file instead of standard out.
|
|
|
|
--regular
|
|
-r
|
|
List only the regular (non-digest) members.
|
|
|
|
--digest={any|mime|plain}
|
|
-d {any|mime|plain}
|
|
List only the digest members. One of 'any', 'mime' or 'plain'
|
|
is required.
|
|
'any' lists all the digest members.
|
|
'mime' lists only the mime digest members.
|
|
'plain' lists only the plain digest members.
|
|
|
|
--fullnames
|
|
-f
|
|
Include the full names in the output.
|
|
|
|
--nomail={any|admin|bounce|user|unknown|enabled}
|
|
-n {any|admin|bounce|user|unknown|enabled}
|
|
List members based on their nomail status. One of 'any', 'admin',
|
|
'bounce', 'user', 'unknown' or 'enabled' is required.
|
|
'any' lists members with delivery disabled for any reason.
|
|
'admin' lists members with delivery disabled by admin.
|
|
'bounce' lists members with delivery disabled by bounce.
|
|
'user' lists members with delivery disabled by the member.
|
|
'unknown' lists members with delivery disabled by mailman 2.0
|
|
'enabled' lists members with delivery enabled.
|
|
|
|
--csv
|
|
-c
|
|
This option overrides the above four selection options and lists
|
|
all members, one per line, with comma separated, quoted values as
|
|
follows:
|
|
"full name" if available, else "","email address","mod",
|
|
"hide","nomail" ("off" or "[A]" or "[B]" or "[U]" or "[?]"),
|
|
"ack","not metoo","nodupes","digest","plain"
|
|
analogous to the admin membership list (the values of the 'checkbox'
|
|
fields are either "off" or "on"). A title line with the above names
|
|
is listed before the member lines.
|
|
|
|
--url_path path
|
|
-u path
|
|
If the list admin pages are accessed at your site via a URL of form
|
|
different from http://hostname/mailman/admin/listname, you need to
|
|
specify the path portion of the URL that is between hostname and
|
|
/listname with this option. For example, a URL such as
|
|
http://hostname/admin.cgi/listname requires the option
|
|
--url_path /admin.cgi
|
|
or
|
|
-u /admin.cgi
|
|
and a URL like http://hostname/cgi-bin/mailman/admin/listname
|
|
requires the option
|
|
--url_path /cgi-bin/mailman/admin
|
|
or
|
|
-u /cgi-bin/mailman/admin
|
|
Default value is /mailman/admin.
|
|
|
|
--unhide
|
|
-U
|
|
Set the 'hidden' flag off for all list members including those not
|
|
selected for output. This will take a long time if there are a lot
|
|
of hidden members. The -v option prints '.' after every 100 unhides.
|
|
This option is only effective with Mailman versions up to 2.1.22
|
|
because it doesn't account for CSRF checks introduced in 2.1.23.
|
|
|
|
--ssl
|
|
-s
|
|
Use https instead of http for accessing the list.
|
|
|
|
--verbose
|
|
-v
|
|
Include extra progress output.
|
|
|
|
--help
|
|
-h
|
|
Print this help message and exit
|
|
|
|
hostname is the name used in the URL of the list's web interface
|
|
listname is the name of the mailing list
|
|
password is the list's admin password
|
|
|
|
The list of subscribers is fetched from the web administrative
|
|
interface. Using the bin/list_members program from a shell
|
|
account is preferable, but not always available.
|
|
|
|
Tested with the Mailman 2.1.5 - 2.1.29 Membership list layout, but the
|
|
--unhide option only works up to 2.1.22.
|
|
|
|
If Python 2.4's cookielib is available, use it. Otherwise require
|
|
ClientCookie http://wwwsearch.sourceforge.net/ClientCookie/
|
|
|
|
This script runs on your workstation and requires that you have Python
|
|
<http://www.python.org> installed. It works best with Python 2.4.x
|
|
through Python 2.7.x. See mailman-subscribers3.py for a Python 3 version.
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
import string
|
|
import urllib
|
|
import getopt
|
|
import httplib
|
|
import urllib2
|
|
from time import sleep
|
|
from HTMLParser import HTMLParser
|
|
# if we have Python 2.4's cookielib, use it
|
|
try:
|
|
import cookielib
|
|
policy = cookielib.DefaultCookiePolicy(rfc2965 = True)
|
|
cookiejar = cookielib.CookieJar(policy)
|
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)).open
|
|
except ImportError:
|
|
import ClientCookie
|
|
# if this is a new ClientCookie, we need to turn on RFC2965 cookies
|
|
cookiejar = ClientCookie.CookieJar()
|
|
try:
|
|
cookiejar.set_policy(ClientCookie.DefaultCookiePolicy(rfc2965 = True))
|
|
# install an opener that uses this policy
|
|
opener = ClientCookie.build_opener(
|
|
ClientCookie.HTTPCookieProcessor(cookiejar))
|
|
ClientCookie.install_opener(opener)
|
|
except AttributeError:
|
|
# must be an old ClientCookie, which already accepts RFC2965 cookies
|
|
pass
|
|
opener = ClientCookie.urlopen
|
|
|
|
PROGRAM = sys.argv[0]
|
|
|
|
try:
|
|
True, False
|
|
except NameError:
|
|
True = 1
|
|
False = 0
|
|
|
|
def usage(code, msg=''):
|
|
if code:
|
|
fd = sys.stderr
|
|
else:
|
|
fd = sys.stdout
|
|
print >> fd, __doc__ % globals()
|
|
if msg:
|
|
print >> fd, msg
|
|
sys.exit(code)
|
|
|
|
subscribers = {}
|
|
vnames = ['_realname', '_mod', '_hide', '_nomail', '_ack', '_notmetoo',
|
|
'_nodupes', '_digest', '_plain']
|
|
maxchunk = 0
|
|
letters = ['0']
|
|
processed_letters = []
|
|
gotnomail = False
|
|
|
|
class MailmanHTMLParser(HTMLParser):
|
|
'''cheap way to find email addresses and pages with multiple
|
|
chunks from Mailman 2.1.5 membership pages'''
|
|
def handle_starttag(self, tag, attrs):
|
|
global maxchunk, letters, gotnomail, subemail, url_path
|
|
if tag == 'input':
|
|
for vname in vnames:
|
|
s = False
|
|
for a,v in attrs:
|
|
if a == 'name' and v.endswith(vname):
|
|
subemail = v[:-len(vname)]
|
|
s = True
|
|
elif a == 'value':
|
|
subval = v
|
|
if s:
|
|
if not subscribers.has_key(subemail):
|
|
subscribers[subemail] = {}
|
|
if vname == '_nomail' and subval == "on":
|
|
gotnomail = True
|
|
else:
|
|
if not isinstance(subval, unicode):
|
|
subval = subval.decode(page_cset, 'replace')
|
|
subscribers[subemail][vname] = subval.encode(
|
|
my_cset, 'replace')
|
|
if tag == 'a':
|
|
for a,v in attrs:
|
|
if a == 'href' and v.find("%s/" % (url_path)) >= 0:
|
|
m = re.search(r'chunk=(?P<chunkno>\d+)', v, re.I)
|
|
if m:
|
|
if int(m.group('chunkno')) > maxchunk:
|
|
maxchunk = int(m.group('chunkno'))
|
|
m = re.search(r'letter=(?P<letter>.)', v, re.I)
|
|
if m:
|
|
letter = m.group('letter')
|
|
if letter not in letters + processed_letters:
|
|
letters.append(letter)
|
|
|
|
def handle_data(self, data):
|
|
global gotnomail, subemail
|
|
if gotnomail:
|
|
gotnomail = False
|
|
subscribers[subemail]['_nomail'] = data
|
|
|
|
def main():
|
|
global maxchunk, letters, url_path, my_cset, page_cset
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "ho:rd:fn:cu:Uvs",
|
|
["help", "output=", "regular", "digest=", "fullnames",
|
|
"nomail=", "csv", "url_path=", "unhide", "verbose",
|
|
"ssl"])
|
|
except:
|
|
usage(2)
|
|
fp = sys.stdout
|
|
fullnames = False
|
|
nomail = None
|
|
verbose = False
|
|
regular = False
|
|
digest = None
|
|
csv = False
|
|
unhide = False
|
|
protocol = 'http'
|
|
url_path = '/mailman/admin'
|
|
for o,a in opts:
|
|
if o in ("-v", "--verbose"):
|
|
verbose = True
|
|
if o in ("-h", "--help"):
|
|
usage(0)
|
|
if o in ("-o", "--output"):
|
|
fp = open(a, "wt")
|
|
if o in ("-f", "--fullnames"):
|
|
fullnames = True
|
|
if o in ("-n", "--nomail"):
|
|
nomail = a.lower()
|
|
if o in ("-r", "--regular"):
|
|
regular = True
|
|
if o in ("-d", "--digest"):
|
|
digest = a.lower()
|
|
if o in ("-c", "--csv"):
|
|
csv = True
|
|
if o in ("-u", "--url_path"):
|
|
url_path = a
|
|
if o in ("-U", "--unhide"):
|
|
unhide = True
|
|
if o in ("-s", "--ssl"):
|
|
protocol = 'https'
|
|
if regular and digest:
|
|
usage(2, "Both 'regular' and 'digest' will produce an empty list.")
|
|
if digest not in [None, 'any', 'mime', 'plain']:
|
|
usage(2, "Digest type %s unrecognized" % digest)
|
|
if nomail not in [None, 'any', 'admin', 'bounce', 'user', 'unknown',
|
|
'enabled']:
|
|
usage(2, "Nomail type %s unrecognized" % nomail)
|
|
if len(args) != 3:
|
|
usage(2)
|
|
|
|
member_url = '%s://%s%s/%s/members' % (protocol, args[0], url_path,
|
|
args[1])
|
|
options_url = '%s://%s%s/%s' % (protocol, args[0],
|
|
re.sub('admin', 'options', url_path),
|
|
args[1])
|
|
p = {'adminpw':args[2]}
|
|
|
|
def_cset = sys.getdefaultencoding()
|
|
if def_cset.lower().endswith('ascii'):
|
|
def_cset = 'iso-8859-1'
|
|
my_cset = sys.stdout.encoding or def_cset
|
|
|
|
# login, picking up the cookie
|
|
try:
|
|
page = opener(member_url, urllib.urlencode(p))
|
|
except (urllib2.URLError, httplib.InvalidURL), e:
|
|
if isinstance(e, urllib2.HTTPError) and e.code == 401:
|
|
usage(1, 'Invalid password.')
|
|
else:
|
|
usage(1, """Error accessing %s
|
|
Supplied host or listname may be incorrect,
|
|
or you may need to specify --url_path.
|
|
""" % (member_url))
|
|
|
|
# Get the charset of the page, but use iso-8859-1 for ascii or None.
|
|
page_cset = page.info().getparam('charset') or 'iso-8859-1'
|
|
if page_cset.lower().endswith('ascii'):
|
|
page_cset = 'iso-8859-1'
|
|
|
|
lines = page.read()
|
|
page.close()
|
|
p = {}
|
|
# Try to recognize the returned page independent of the list language
|
|
if re.search(r'INPUT\s+type="SUBMIT"\s+name="admlogin"', lines,
|
|
re.M + re.I):
|
|
# login page - invalid password
|
|
usage(1,
|
|
'Login invalid - possibly incorrect password or missing -s option.')
|
|
if not re.search(r'<form\s+action=', lines, re.M + re.I):
|
|
# no <form> tag - admin overview page
|
|
usage(1, """Non-existent list: %s.
|
|
If the provided list name is valid, the supplied host may be incorrect
|
|
or you may need to specify --url_path.
|
|
""" % args[1])
|
|
|
|
# loop through the letters, and all chunks of each
|
|
while len(letters) > 0:
|
|
letter = letters[0]
|
|
letters = letters[1:]
|
|
processed_letters.append(letter)
|
|
chunk = 0
|
|
maxchunk = 0
|
|
while chunk <= maxchunk:
|
|
if verbose:
|
|
print >> sys.stderr, "%c(%d)" % (letter, chunk)
|
|
while True:
|
|
try:
|
|
page = opener(member_url + "?letter=%s&chunk=%d" %
|
|
(letter, chunk))
|
|
lines = page.read()
|
|
page.close()
|
|
except urllib2.URLError:
|
|
if verbose:
|
|
print >> sys.stderr,\
|
|
'Error encountered in accessing web page.',\
|
|
'Retrying.'
|
|
sleep(2)
|
|
else:
|
|
break
|
|
|
|
parser = MailmanHTMLParser()
|
|
parser.feed(lines)
|
|
parser.close()
|
|
chunk += 1
|
|
|
|
subscriberlist = subscribers.items()
|
|
subscriberlist.sort()
|
|
|
|
# print the subscribers list
|
|
if csv:
|
|
print >>fp, '"Full name","email address","mod","hide",\
|
|
"nomail","ack","not metoo","nodupes","digest","plain"'
|
|
|
|
nunhide = 0
|
|
for (email, d) in subscriberlist:
|
|
if unhide and d['_hide'] == "on":
|
|
params = urllib.urlencode({'conceal':0,
|
|
'options-submit':1})
|
|
u = opener("%s/%s" % (options_url, email), params)
|
|
u.close()
|
|
d['_hide'] = "off"
|
|
nunhide += 1
|
|
if verbose and nunhide % 100 == 0:
|
|
print >>sys.stderr, '.',
|
|
email = urllib.unquote(email)
|
|
if csv:
|
|
print >>fp,\
|
|
'"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s"'\
|
|
% (d['_realname'], email, d['_mod'], d['_hide'],
|
|
d['_nomail'], d['_ack'], d['_notmetoo'],
|
|
d['_nodupes'], d['_digest'], d['_plain'])
|
|
continue
|
|
if nomail == 'enabled' and d['_nomail'] <> "off":
|
|
continue
|
|
if nomail == 'any' and d['_nomail'] == "off":
|
|
continue
|
|
if nomail == 'admin' and d['_nomail'] <> "[A]":
|
|
continue
|
|
if nomail == 'bounce' and d['_nomail'] <> "[B]":
|
|
continue
|
|
if nomail == 'user' and d['_nomail'] <> "[U]":
|
|
continue
|
|
if nomail == 'unknown' and d['_nomail'] <> "[?]":
|
|
continue
|
|
if regular and d['_digest'] == "on":
|
|
continue
|
|
if digest and d['_digest'] == "off":
|
|
continue
|
|
if digest == "mime" and d['_plain'] == "on":
|
|
continue
|
|
if digest == "plain" and d['_plain'] == "off":
|
|
continue
|
|
if not fullnames or d['_realname'] == "":
|
|
print >>fp, email
|
|
else:
|
|
print >>fp, '%s <%s>' % (d['_realname'], email)
|
|
|
|
fp.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|