Changeset - 050a94de8e7f
[Not reviewed]
default
0 1 1
Thomas De Schampheleire - 7 years ago 2019-03-15 21:21:12
thomas.de_schampheleire@nokia.com
scripts: extract contributor data out of update-copyrights.py

... to allow re-use in other scripts, like shortlog.py.
2 files changed with 98 insertions and 99 deletions:
0 comments (0 inline, 0 general)
scripts/contributor_data.py
Show inline comments
 
new file 100644
 
# -*- coding: utf-8 -*-
 

	
 
# Some committers are so wrong that it doesn't point at any contributor:
 
total_ignore = set()
 
total_ignore.add('*** failed to import extension hggit: No module named hggit')
 
total_ignore.add('<>')
 

	
 
# Normalize some committer names where people have contributed under different
 
# names or email addresses:
 
name_fixes = {}
 
name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
 
name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
 
name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
 
name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
 
name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
 
name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
 
name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
 
name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
 
name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
 
name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
 
name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
 
name_fixes['Jelmer Vernooij'] = "Jelmer Vernooij <jelmer@samba.org>"
 
name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
 
name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
 
name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
 
name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
 
name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
 
name_fixes['marcink'] = "Marcin Kuźmiński <marcin@python-works.com>"
 
name_fixes['Marcin Kuzminski'] = "Marcin Kuźmiński <marcin@python-works.com>"
 
name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
 
name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
 
name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
 
name_fixes['Søren Løvborg'] = "Søren Løvborg <sorenl@unity3d.com>"
 
name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
 
name_fixes['Weblate'] = "<>"
 
name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
 
name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
 

	
 
# Some committer email address domains that indicate that another entity might
 
# hold some copyright too:
 
domain_extra = {}
 
domain_extra['unity3d.com'] = "Unity Technologies"
 
domain_extra['rhodecode.com'] = "RhodeCode GmbH"
 

	
 
# Repository history show some old contributions that traditionally hasn't been
 
# listed in about.html - preserve that:
 
no_about = set(total_ignore)
 
# The following contributors were traditionally not listed in about.html and it
 
# seems unclear if the copyright is personal or belongs to a company.
 
no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
 
no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
 
no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
 
no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
 
# These contributors' contributions might be too small to be copyrightable:
 
no_about.add(('philip.j@hostdime.com', '2012'))
 
no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
 
no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
 
# Was reworked and contributed later and shadowed by other contributions:
 
no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
 

	
 
# Preserve contributors listed in about.html but not appearing in repository
 
# history:
 
other_about = [
 
    ("2011", "Aparkar <aparkar@icloud.com>"),
 
    ("2010", "RhodeCode GmbH"),
 
    ("2011", "RhodeCode GmbH"),
 
    ("2012", "RhodeCode GmbH"),
 
    ("2013", "RhodeCode GmbH"),
 
]
 

	
 
# Preserve contributors listed in CONTRIBUTORS but not appearing in repository
 
# history:
 
other_contributors = [
 
    ("", "Andrew Kesterson <andrew@aklabs.net>"),
 
    ("", "cejones"),
 
    ("", "David A. Sjøen <david.sjoen@westcon.no>"),
 
    ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
 
    ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
 
    ("", "larikale"),
 
    ("", "RhodeCode GmbH"),
 
    ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
 
    ("", "Steve Romanow <slestak989@gmail.com>"),
 
    ("", "SteveCohen"),
 
    ("", "Thomas <thomas@rhodecode.com>"),
 
    ("", "Thomas Waldmann <tw-public@gmx.de>"),
 
]
scripts/update-copyrights.py
Show inline comments
 
@@ -29,118 +29,28 @@ Both make a best effort of listing all c
 
history might be a better and more definitive source.
 

	
 
Contributors are sorted "fairly" by copyright year and amount of
 
contribution.
 

	
 
New contributors are listed, without considering if the contribution contains
 
copyrightable work.
 

	
 
When the copyright might belong to a different legal entity than the
 
contributor, the legal entity is given credit too.
 
"""
 

	
 

	
 
# Some committers are so wrong that it doesn't point at any contributor:
 
total_ignore = set()
 
total_ignore.add('*** failed to import extension hggit: No module named hggit')
 
total_ignore.add('<>')
 

	
 
# Normalize some committer names where people have contributed under different
 
# names or email addresses:
 
name_fixes = {}
 
name_fixes['Andrew Shadura'] = "Andrej Shadura <andrew@shadura.me>"
 
name_fixes['aparkar'] = "Aparkar <aparkar@icloud.com>"
 
name_fixes['Aras Pranckevicius'] = "Aras Pranckevičius <aras@unity3d.com>"
 
name_fixes['Augosto Hermann'] = "Augusto Herrmann <augusto.herrmann@planejamento.gov.br>"
 
name_fixes['"Bradley M. Kuhn" <bkuhn@ebb.org>'] = "Bradley M. Kuhn <bkuhn@sfconservancy.org>"
 
name_fixes['dmitri.kuznetsov'] = "Dmitri Kuznetsov"
 
name_fixes['Dmitri Kuznetsov'] = "Dmitri Kuznetsov"
 
name_fixes['domruf'] = "Dominik Ruf <dominikruf@gmail.com>"
 
name_fixes['Ingo von borstel'] = "Ingo von Borstel <kallithea@planetmaker.de>"
 
name_fixes['Jan Heylen'] = "Jan Heylen <heyleke@gmail.com>"
 
name_fixes['Jason F. Harris'] = "Jason Harris <jason@jasonfharris.com>"
 
name_fixes['Jelmer Vernooij'] = "Jelmer Vernooij <jelmer@samba.org>"
 
name_fixes['jfh <jason@jasonfharris.com>'] = "Jason Harris <jason@jasonfharris.com>"
 
name_fixes['Leonardo Carneiro<leonardo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['leonardo'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['Leonardo <leo@unity3d.com>'] = "Leonardo Carneiro <leonardo@unity3d.com>"
 
name_fixes['Les Peabody'] = "Les Peabody <lpeabody@gmail.com>"
 
name_fixes['"Lorenzo M. Catucci" <lorenzo@sancho.ccd.uniroma2.it>'] = "Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>"
 
name_fixes['Lukasz Balcerzak'] = "Łukasz Balcerzak <lukaszbalcerzak@gmail.com>"
 
name_fixes['mao <mao@lins.fju.edu.tw>'] = "Ching-Chen Mao <mao@lins.fju.edu.tw>"
 
name_fixes['marcink'] = "Marcin Kuźmiński <marcin@python-works.com>"
 
name_fixes['Marcin Kuzminski'] = "Marcin Kuźmiński <marcin@python-works.com>"
 
name_fixes['nansenat16@null.tw'] = "nansenat16 <nansenat16@null.tw>"
 
name_fixes['Peter Vitt'] = "Peter Vitt <petervitt@web.de>"
 
name_fixes['philip.j@hostdime.com'] = "Philip Jameson <philip.j@hostdime.com>"
 
name_fixes['Søren Løvborg'] = "Søren Løvborg <sorenl@unity3d.com>"
 
name_fixes['Thomas De Schampheleire'] = "Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>"
 
name_fixes['Weblate'] = "<>"
 
name_fixes['xpol'] = "xpol <xpolife@gmail.com>"
 
name_fixes['Lars <devel@sumpfralle.de>'] = "Lars Kruse <devel@sumpfralle.de>"
 

	
 

	
 
# Some committer email address domains that indicate that another entity might
 
# hold some copyright too:
 
domain_extra = {}
 
domain_extra['unity3d.com'] = "Unity Technologies"
 
domain_extra['rhodecode.com'] = "RhodeCode GmbH"
 

	
 
# Repository history show some old contributions that traditionally hasn't been
 
# listed in about.html - preserve that:
 
no_about = set(total_ignore)
 
# The following contributors were traditionally not listed in about.html and it
 
# seems unclear if the copyright is personal or belongs to a company.
 
no_about.add(('Thayne Harbaugh <thayne@fusionio.com>', '2011'))
 
no_about.add(('Dies Koper <diesk@fast.au.fujitsu.com>', '2012'))
 
no_about.add(('Erwin Kroon <e.kroon@smartmetersolutions.nl>', '2012'))
 
no_about.add(('Vincent Caron <vcaron@bearstech.com>', '2012'))
 
# These contributors' contributions might be too small to be copyrightable:
 
no_about.add(('philip.j@hostdime.com', '2012'))
 
no_about.add(('Stefan Engel <mail@engel-stefan.de>', '2012'))
 
no_about.add(('Ton Plomp <tcplomp@gmail.com>', '2013'))
 
# Was reworked and contributed later and shadowed by other contributions:
 
no_about.add(('Sean Farley <sean.michael.farley@gmail.com>', '2013'))
 

	
 
# Preserve contributors listed in about.html but not appearing in repository
 
# history:
 
other_about = [
 
    ("2011", "Aparkar <aparkar@icloud.com>"),
 
    ("2010", "RhodeCode GmbH"),
 
    ("2011", "RhodeCode GmbH"),
 
    ("2012", "RhodeCode GmbH"),
 
    ("2013", "RhodeCode GmbH"),
 
]
 

	
 
# Preserve contributors listed in CONTRIBUTORS but not appearing in repository
 
# history:
 
other_contributors = [
 
    ("", "Andrew Kesterson <andrew@aklabs.net>"),
 
    ("", "cejones"),
 
    ("", "David A. Sjøen <david.sjoen@westcon.no>"),
 
    ("", "James Rhodes <jrhodes@redpointsoftware.com.au>"),
 
    ("", "Jonas Oberschweiber <jonas.oberschweiber@d-velop.de>"),
 
    ("", "larikale"),
 
    ("", "RhodeCode GmbH"),
 
    ("", "Sebastian Kreutzberger <sebastian@rhodecode.com>"),
 
    ("", "Steve Romanow <slestak989@gmail.com>"),
 
    ("", "SteveCohen"),
 
    ("", "Thomas <thomas@rhodecode.com>"),
 
    ("", "Thomas Waldmann <tw-public@gmx.de>"),
 
]
 

	
 

	
 
import os
 
import re
 
from collections import defaultdict
 
import contributor_data
 

	
 

	
 
def sortkey(x):
 
    """Return key for sorting contributors "fairly":
 
    * latest contribution
 
    * first contribution
 
    * number of contribution years
 
    * name (with some unicode normalization)
 
    The entries must be 2-tuples of a list of string years and the unicode name"""
 
    return (x[0] and -int(x[0][-1]),
 
            x[0] and int(x[0][0]),
 
            -len(x[0]),
 
@@ -200,54 +110,54 @@ def insert_entries(
 

	
 
    with open(filename) as f:
 
        pre, post = re.split(split_re, f.read())
 

	
 
    with open(filename, 'w') as f:
 
        f.write(pre +
 
                ''.join(format_f(years, name) for years, name in l) +
 
                post)
 

	
 

	
 
def main():
 
    repo_entries = [
 
        (year, name_fixes.get(name) or name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
 
        (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
 
        for year, name in
 
        (line.strip().split(' ', 1)
 
         for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
 
        ]
 

	
 
    insert_entries(
 
        filename='kallithea/templates/about.html',
 
        all_entries=repo_entries + other_about,
 
        no_entries=no_about,
 
        domain_extra=domain_extra,
 
        all_entries=repo_entries + contributor_data.other_about,
 
        no_entries=contributor_data.no_about,
 
        domain_extra=contributor_data.domain_extra,
 
        split_re=r'(?:  <li>Copyright &copy; [^\n]*</li>\n)*',
 
        normalize_name=lambda name: name.split('<', 1)[0].strip(),
 
        format_f=lambda years, name: '  <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
 
        )
 

	
 
    insert_entries(
 
        filename='CONTRIBUTORS',
 
        all_entries=repo_entries + other_contributors,
 
        no_entries=total_ignore,
 
        domain_extra=domain_extra,
 
        all_entries=repo_entries + contributor_data.other_contributors,
 
        no_entries=contributor_data.total_ignore,
 
        domain_extra=contributor_data.domain_extra,
 
        split_re=r'(?:    [^\n]*\n)*',
 
        normalize_name=lambda name: name,
 
        format_f=lambda years, name: ('    %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
 
        )
 

	
 
    insert_entries(
 
        filename='kallithea/templates/base/base.html',
 
        all_entries=repo_entries,
 
        no_entries=total_ignore,
 
        no_entries=contributor_data.total_ignore,
 
        domain_extra={},
 
        split_re=r'(?<=&copy;) .* (?=by various authors)',
 
        normalize_name=lambda name: '',
 
        format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
 
        )
 

	
 

	
 
if __name__ == '__main__':
 
    main()
 

	
 

	
 
# To list new contributors since last tagging:
0 comments (0 inline, 0 general)