Attachment 'checori.py'

Download

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # checori.py
   5 #
   6 # version 0.2 2007-11-30
   7 #
   8 # Copyright © 2007 Jan Beyer <jan@beathovn.de>
   9 #
  10 # This program is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # This program is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # On Debian systems, the complete text of the GNU General
  21 # Public License can be found in `/usr/share/common-licenses/GPL'.
  22 #
  23 # This program searches through all files in the current directory (and recursively
  24 # all subdirectories) for the occurence of copyright information.
  25 # It extracts this and tries to fit this into the proposed new debian/copyright
  26 # format, see http://wiki.debian.org/Proposals/CopyrightFormat.
  27 # YOU STILL HAVE TO MANUALLY EDIT THE OUTPUT OF THIS PROGRAM TO GET A VALID 
  28 # debian/copyright FILE!!! It may, however, be helpful in gathering the
  29 # needed information.
  30 #
  31 #
  32 
  33 import os
  34 import sys
  35 import re
  36 
  37 def reduce_path(p):
  38   """Strip the path to the directory.
  39     
  40   Takes a filename as an argument and strips our starting directory away.
  41   The stripped filename is returned.
  42   """
  43   	
  44   if p.startswith(basedir):
  45     return p.replace(basedir+'/','')
  46   else:
  47     return p
  48 
  49 def check_copyright(file):
  50   """Check the given file for copyright information.
  51   
  52   The given file is search for the occurence of the regex "checkfor". Its 
  53   filename,   two copyright-lines (as one string) and two paragraphs of license
  54   information (as one string) are returned.
  55   If the license is either GPL or LGPL, no license information is returned, as
  56   you are supposed to have the standard pointer to '/usr/share/common-licenses'
  57   in your debian/copyright file.
  58   If the copyright matches the main copyright of your package (regex 
  59   "maincopyright") then three empty strings are returned, as we don't need to
  60   list these files.
  61   """
  62   
  63   f=open(file,'r')
  64   fn = ''
  65   line = f.readline().strip('/*# ')
  66   while not len(line) == 0:
  67     if (checkfor.match(line) != None) and (maincopyright.match(line.strip()) == None):
  68       fn = reduce_path(file)
  69       copylic = [line.strip()]
  70       for i in range(maxlinesaftercopyright):
  71         copylic.append(f.readline().strip('/*# ').strip()) # stripping usual commentary signs and newlines.
  72       # The following you probably want to check for your package.
  73       # My upstreams copyright information always starts with 'Copyright (C)',
  74       # so I strip this and add © instead.
  75       # A second line is added, because there are often the email addresses.
  76       cr = '© '+copylic[0].lstrip('Copyright (C)').strip()+copylic[1].strip()
  77       i = 2
  78       lic = ''
  79       for j in range(i,maxlinesaftercopyright): # look for (L)GPL info
  80         if (copylic[j].find('GNU General') != -1):
  81           lic = 'GPL'
  82         if (copylic[j].find('GNU Lesser General') != -1) or (copylic[j].find('GNU Library General') != -1):
  83           lic = 'LGPL'
  84         if (copylic[j].lower().find('version 2') != -1):
  85           lic += '-2'
  86         if (copylic[j].find('any later version') != -1):
  87           lic += '+'
  88         if (copylic[j].lower().find('public domain') != -1):
  89           lic = 'PD'
  90       if lic == '':
  91         lic = 'other'
  92       lic += '\n'
  93       # The following I do not like, but it works, mostly...
  94       # I'm just appending two paragraphs, but max "maxlinesaftercopyright"
  95       # to lic to have an idea of the real license already there. If the
  96       # license is not GPL and not LGPL. These should be covered by the standard
  97       # reference to '/usr/share/common-licenses'.
  98       # For funny licenses, one really has to go to the source files and check
  99       # carefully and add the correct info to debian/copyright manually.
 100       paragraphs = 0
 101       while (i <= maxlinesaftercopyright-1) and (paragraphs < 2) and (not lic.startswith('GPL')) and (not lic.startswith('LGPL')):
 102         lic += ' '+copylic[i]+'\n'
 103         i += 1
 104         if len(copylic[i]) == 0:
 105           paragraphs += 1
 106       line = f.readline().strip('/*# ')
 107     else:
 108       line = f.readline().strip('/*# ')
 109   f.close()
 110   if fn != '':
 111     return fn, cr, lic
 112   else:
 113     return '', '', ''
 114 
 115 def checkdir(dire):
 116   """Check a directory (recursively) for copyright information in its files.
 117 
 118   For all the files in the directory, check_copyright(filename) is called. The
 119   returned string triplet is added to three lists: files, copyrights, licenses.
 120   If the copyright and license matches an already existing entry in the latter
 121   two lists, only the filename is added to the corresponding files[i], otherwise
 122   the complete triplet is added to the three lists.
 123   """
 124   
 125   filelist = os.listdir(dire)
 126   filelist.sort()
 127 
 128   for item in filelist:
 129     if os.path.isfile(os.path.join(dire,item)):
 130       fn, cr, lic = check_copyright(os.path.join(dire,item))
 131       if (fn != ''):
 132         done = 0
 133         for i in range(len(copyrights)):
 134           if (cr == copyrights[i]) and (lic == licenses[i]):
 135             files[i] += ', '+fn
 136             done = 1
 137           else:
 138             done = 0
 139         if not done:
 140           files.append(fn)
 141           copyrights.append(cr)
 142           licenses.append(lic)
 143     if os.path.isdir(os.path.join(dire,item)):
 144       checkdir(os.path.join(dire,item))
 145   
 146 
 147 if __name__=="__main__":
 148   """Compile copyright info from a source tree.
 149   
 150   Call this script in the following way
 151   $ checori > copyright-skeleton
 152   
 153   This script looks for copyright information in all files in a source tree,
 154   starting from the current directory. It outputs filenames, their copyright
 155   holders and license information in a format resembling the proposal for
 156   machine-interpretable debian/copyright files on
 157   http://wiki.debian.org/Proposals/CopyrightFormat in the version from Nov 2007.
 158   
 159   IT DOES NOT PRODUCE VALID debian/copyright FILES!!!
 160   """
 161   
 162   print('This file is NO VALID debian/copyright FILE!!!')
 163 
 164   # the files are checked for "checkfor". Starting from there, all the rest of
 165   # the copyright and license information is looked for.
 166   checkfor = re.compile('Copyright')
 167 
 168   # "maincopyright" holds the copyright of the package as a whole. This has to
 169   # be added manually to your final file in an appropriate way. All
 170   # files containing this information will NOT be reported by this script!!!
 171   # This means also files, which have a differing license, than the main license
 172   # but the same copyright line! BEWARE!
 173   maincopyright = re.compile('Copyright \(C\) [0-9, -]* David Necas \(Yeti\), Petr Klapetek.')
 174   
 175   # "maxlinesaftercopyright" is the maximum number of lines, the license
 176   # information will contain. But max two paragraphs are added.
 177   maxlinesaftercopyright = 10
 178 
 179   basedir = os.getcwd()
 180   
 181   # Here I initialize the three lists, which will hold all our wanted info.
 182   # These variables will be directly manipulated in the functions.
 183   # Yes, I know, this is not the correct way - but it is a quick way... ;-)
 184   files = []
 185   copyrights = []
 186   licenses = []
 187   
 188   # Let's  start the magic...
 189   checkdir(basedir)
 190   
 191   # Now output the result to STDOUT.
 192   for i in range(len(copyrights)):
 193     print 'Files: '+files[i]
 194     print 'Copyright: '+copyrights[i]
 195     print 'License: '+licenses[i]

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2007-11-30 21:52:14, 7.3 KB) [[attachment:checori.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.