Attachment 'checori.py'
Download 1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # checori.py
5 #
6 # version 0.2 2007-11-30
7 #
8 # Copyright © 2007 Jan Beyer <jan@beathovn.de>
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # On Debian systems, the complete text of the GNU General
21 # Public License can be found in `/usr/share/common-licenses/GPL'.
22 #
23 # This program searches through all files in the current directory (and recursively
24 # all subdirectories) for the occurence of copyright information.
25 # It extracts this and tries to fit this into the proposed new debian/copyright
26 # format, see http://wiki.debian.org/Proposals/CopyrightFormat.
27 # YOU STILL HAVE TO MANUALLY EDIT THE OUTPUT OF THIS PROGRAM TO GET A VALID
28 # debian/copyright FILE!!! It may, however, be helpful in gathering the
29 # needed information.
30 #
31 #
32
33 import os
34 import sys
35 import re
36
37 def reduce_path(p):
38 """Strip the path to the directory.
39
40 Takes a filename as an argument and strips our starting directory away.
41 The stripped filename is returned.
42 """
43
44 if p.startswith(basedir):
45 return p.replace(basedir+'/','')
46 else:
47 return p
48
49 def check_copyright(file):
50 """Check the given file for copyright information.
51
52 The given file is search for the occurence of the regex "checkfor". Its
53 filename, two copyright-lines (as one string) and two paragraphs of license
54 information (as one string) are returned.
55 If the license is either GPL or LGPL, no license information is returned, as
56 you are supposed to have the standard pointer to '/usr/share/common-licenses'
57 in your debian/copyright file.
58 If the copyright matches the main copyright of your package (regex
59 "maincopyright") then three empty strings are returned, as we don't need to
60 list these files.
61 """
62
63 f=open(file,'r')
64 fn = ''
65 line = f.readline().strip('/*# ')
66 while not len(line) == 0:
67 if (checkfor.match(line) != None) and (maincopyright.match(line.strip()) == None):
68 fn = reduce_path(file)
69 copylic = [line.strip()]
70 for i in range(maxlinesaftercopyright):
71 copylic.append(f.readline().strip('/*# ').strip()) # stripping usual commentary signs and newlines.
72 # The following you probably want to check for your package.
73 # My upstreams copyright information always starts with 'Copyright (C)',
74 # so I strip this and add © instead.
75 # A second line is added, because there are often the email addresses.
76 cr = '© '+copylic[0].lstrip('Copyright (C)').strip()+copylic[1].strip()
77 i = 2
78 lic = ''
79 for j in range(i,maxlinesaftercopyright): # look for (L)GPL info
80 if (copylic[j].find('GNU General') != -1):
81 lic = 'GPL'
82 if (copylic[j].find('GNU Lesser General') != -1) or (copylic[j].find('GNU Library General') != -1):
83 lic = 'LGPL'
84 if (copylic[j].lower().find('version 2') != -1):
85 lic += '-2'
86 if (copylic[j].find('any later version') != -1):
87 lic += '+'
88 if (copylic[j].lower().find('public domain') != -1):
89 lic = 'PD'
90 if lic == '':
91 lic = 'other'
92 lic += '\n'
93 # The following I do not like, but it works, mostly...
94 # I'm just appending two paragraphs, but max "maxlinesaftercopyright"
95 # to lic to have an idea of the real license already there. If the
96 # license is not GPL and not LGPL. These should be covered by the standard
97 # reference to '/usr/share/common-licenses'.
98 # For funny licenses, one really has to go to the source files and check
99 # carefully and add the correct info to debian/copyright manually.
100 paragraphs = 0
101 while (i <= maxlinesaftercopyright-1) and (paragraphs < 2) and (not lic.startswith('GPL')) and (not lic.startswith('LGPL')):
102 lic += ' '+copylic[i]+'\n'
103 i += 1
104 if len(copylic[i]) == 0:
105 paragraphs += 1
106 line = f.readline().strip('/*# ')
107 else:
108 line = f.readline().strip('/*# ')
109 f.close()
110 if fn != '':
111 return fn, cr, lic
112 else:
113 return '', '', ''
114
115 def checkdir(dire):
116 """Check a directory (recursively) for copyright information in its files.
117
118 For all the files in the directory, check_copyright(filename) is called. The
119 returned string triplet is added to three lists: files, copyrights, licenses.
120 If the copyright and license matches an already existing entry in the latter
121 two lists, only the filename is added to the corresponding files[i], otherwise
122 the complete triplet is added to the three lists.
123 """
124
125 filelist = os.listdir(dire)
126 filelist.sort()
127
128 for item in filelist:
129 if os.path.isfile(os.path.join(dire,item)):
130 fn, cr, lic = check_copyright(os.path.join(dire,item))
131 if (fn != ''):
132 done = 0
133 for i in range(len(copyrights)):
134 if (cr == copyrights[i]) and (lic == licenses[i]):
135 files[i] += ', '+fn
136 done = 1
137 else:
138 done = 0
139 if not done:
140 files.append(fn)
141 copyrights.append(cr)
142 licenses.append(lic)
143 if os.path.isdir(os.path.join(dire,item)):
144 checkdir(os.path.join(dire,item))
145
146
147 if __name__=="__main__":
148 """Compile copyright info from a source tree.
149
150 Call this script in the following way
151 $ checori > copyright-skeleton
152
153 This script looks for copyright information in all files in a source tree,
154 starting from the current directory. It outputs filenames, their copyright
155 holders and license information in a format resembling the proposal for
156 machine-interpretable debian/copyright files on
157 http://wiki.debian.org/Proposals/CopyrightFormat in the version from Nov 2007.
158
159 IT DOES NOT PRODUCE VALID debian/copyright FILES!!!
160 """
161
162 print('This file is NO VALID debian/copyright FILE!!!')
163
164 # the files are checked for "checkfor". Starting from there, all the rest of
165 # the copyright and license information is looked for.
166 checkfor = re.compile('Copyright')
167
168 # "maincopyright" holds the copyright of the package as a whole. This has to
169 # be added manually to your final file in an appropriate way. All
170 # files containing this information will NOT be reported by this script!!!
171 # This means also files, which have a differing license, than the main license
172 # but the same copyright line! BEWARE!
173 maincopyright = re.compile('Copyright \(C\) [0-9, -]* David Necas \(Yeti\), Petr Klapetek.')
174
175 # "maxlinesaftercopyright" is the maximum number of lines, the license
176 # information will contain. But max two paragraphs are added.
177 maxlinesaftercopyright = 10
178
179 basedir = os.getcwd()
180
181 # Here I initialize the three lists, which will hold all our wanted info.
182 # These variables will be directly manipulated in the functions.
183 # Yes, I know, this is not the correct way - but it is a quick way... ;-)
184 files = []
185 copyrights = []
186 licenses = []
187
188 # Let's start the magic...
189 checkdir(basedir)
190
191 # Now output the result to STDOUT.
192 for i in range(len(copyrights)):
193 print 'Files: '+files[i]
194 print 'Copyright: '+copyrights[i]
195 print 'License: '+licenses[i]
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.
