Attachment 'generate_nice_graph.py'

Download

   1 #!/usr/bin/env python
   2 
   3 # Copyright 2013 Elena Grandi
   4 # 
   5 # This work is free. You can redistribute it and/or modify it under the
   6 # terms of the Do What The Fuck You Want To Public License, Version 2,
   7 # as published by Sam Hocevar. See http://www.wtfpl.net/ 
   8 # for more details.
   9 
  10 """
  11 This script regenerates the graph shown on the `Debian Women Statistics 
  12 page <https://wiki.debian.org/DebianWomen/Projects/Statistics>`_.
  13 
  14 It parses the raw text version of the page looking for the titles 
  15 "Women that have at some point maintained packages", 
  16 "Women with DD account", "Women with DM keys" and 
  17 "Latest uploads from DD accounts" (the latter to stop parsing) 
  18 and then taking data points from lines starting with " *" 
  19 followed by a date.
  20 Lines that include the words "locked" or "resigned" are counted 
  21 as a negative value.
  22 
  23 It requires python-matplotlib >= 1.3, for the plt.xkcd() command.
  24 
  25 To use it, just run the script and then upload the resulting png file.
  26 """
  27 
  28 import datetime
  29 import urllib2
  30 
  31 import matplotlib.pyplot as plt
  32 import matplotlib.font_manager
  33 
  34 URL = "https://wiki.debian.org/DebianWomen/Projects/Statistics?action=raw"
  35 
  36 FNAME = "dwstats.png"
  37 
  38 class DWParser:
  39 
  40     def parse(self,url):
  41         self._reset_values()
  42         self.status = 'START'
  43         up = urllib2.urlopen(url)
  44         for line in up.readlines():
  45             if 'Women that have at some point maintained packages' in line:
  46                 self.status = 'UPLOADS'
  47                 continue
  48             elif 'Women with DD account' in line:
  49                 self.status = 'DD'
  50                 continue
  51             elif 'Women with DM keys' in line:
  52                 self.status = 'DM'
  53                 continue
  54             elif 'Latest uploads from DD accounts' in line:
  55                 self.status = 'DONE'
  56                 break
  57             if line.startswith(' *'):
  58                 try:
  59                     date = datetime.datetime.strptime( line.split()[1],
  60                             '%Y-%m-%d').date()
  61                 except ValueError:
  62                     continue
  63                 if 'locked' in line or 'resigned' in line:
  64                     v = -1
  65                 else:
  66                     v = 1
  67                 if self.status == 'UPLOADS':
  68                     self.uploads.append((date,v))
  69                 elif self.status == 'DD':
  70                     self.dd.append((date,v))
  71                 elif self.status == 'DM':
  72                     self.dm.append((date,v))
  73         self.uploads.sort()
  74         self.dd.sort()
  75         self.dm.sort()
  76 
  77     def _reset_values(self):
  78         self.uploads = []
  79         self.dd = []
  80         self.dm = []
  81 
  82 
  83 class GraphGenerator:
  84     
  85     def __init__(self):
  86         plt.xkcd()
  87         self.fig = plt.figure(figsize=(8.38,6.24))
  88         plt.xlim(datetime.date(1996,01,01), datetime.date.today())
  89         self.font = matplotlib.font_manager.FontProperties(
  90                 family='Sans',
  91                 weight='normal')
  92 
  93     def add_line(self,dates,label):
  94         x = [d[0] for d in dates] + [datetime.date.today()]
  95         y_st = [d[1] for d in dates] + [0]
  96         y = [sum(y_st[:i+1]) for i in xrange(len(y_st))]
  97         plt.plot(x, y, label = label)
  98 
  99     def save(self,fname):
 100         plt.legend(loc='upper left', prop=self.font)
 101         ax2 = plt.twinx()
 102         ax2.set_ylim(self.fig.axes[0].get_ylim())
 103         for ax in self.fig.axes:
 104             for label in ax.get_xticklabels():
 105                 label.set_fontproperties(self.font)
 106             for label in ax.get_yticklabels():
 107                 label.set_fontproperties(self.font)
 108         plt.savefig(fname)
 109 
 110 
 111 def main():
 112     p = DWParser()
 113     p.parse(URL)
 114     g = GraphGenerator()
 115     g.add_line(p.uploads,"First uploads")
 116     g.add_line(p.dd,"DD Account")
 117     g.add_line(p.dm,"DM key")
 118     g.save(FNAME)
 119 
 120 if __name__ == '__main__': main()

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.