Files @ 75b0d3fd6303
Branch filter:

Location: kallithea/scripts/update-copyrights.py

Mads Kiilerich
ssh: handle IPv6 ssh connections

Performing ssh actions towards Kallithea via an IPv6 connection gave the
following error:

$ hg incoming ssh://kallithea@example.com/repo
remote: Traceback (most recent call last):
remote: File ".../bin/kallithea-cli", line 11, in <module>
remote: load_entry_point('Kallithea', 'console_scripts', 'kallithea-cli')()
remote: File ".../python2.7/site-packages/click/core.py", line 764, in __call__
remote: return self.main(*args, **kwargs)
remote: File ".../python2.7/site-packages/click/core.py", line 717, in main
remote: rv = self.invoke(ctx)
remote: File ".../python2.7/site-packages/click/core.py", line 1137, in invoke
remote: return _process_result(sub_ctx.command.invoke(sub_ctx))
remote: File ".../python2.7/site-packages/click/core.py", line 956, in invoke
remote: return ctx.invoke(self.callback, **ctx.params)
remote: File ".../python2.7/site-packages/click/core.py", line 555, in invoke
remote: return callback(*args, **kwargs)
remote: File ".../kallithea/bin/kallithea_cli_base.py", line 79, in runtime_wrapper
remote: return annotated(*args, **kwargs)
remote: File ".../kallithea/bin/kallithea_cli_ssh.py", line 74, in ssh_serve
remote: vcs_handler.serve(user_id, key_id, client_ip)
remote: File ".../kallithea/lib/vcs/backends/ssh.py", line 65, in serve
remote: self.authuser = AuthUser.make(dbuser=dbuser, ip_addr=client_ip)
remote: File ".../kallithea/lib/auth.py", line 407, in make
remote: if not check_ip_access(source_ip=ip_addr, allowed_ips=allowed_ips):
remote: File ".../kallithea/lib/auth.py", line 860, in check_ip_access
remote: if ipaddr.IPAddress(source_ip) in ipaddr.IPNetwork(ip):
remote: File ".../kallithea/lib/ipaddr.py", line 76, in IPAddress
remote:
remote: ValueError: '2' does not appear to be an IPv4 or IPv6 address
abort: no suitable response from remote hg!


This was caused by IPv4-exclusive parsing of the SSH_CONNECTION variable.
With an IPv6 address starting with '2a02:1810:', only the first '2' would
survive.

According to 'man 1 ssh':

SSH_CONNECTION Identifies the client and server ends of the con‐
nection. The variable contains four space-sepa‐
rated values: client IP address, client port num‐
ber, server IP address, and server port number.


So, the client IP address will be the first space-separated word, regardless
of IPv4 or IPv6. Use that knowledge without further parsing.

(commit message by Thomas De Schampheleire)
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

"""
Kallithea script for maintaining contributor lists from version control
history.

This script and the data in it is a best effort attempt at reverse engineering
previous attributions and correlate that with version control history while
preserving all existing copyright statements and attribution. This script is
processing and summarizing information found elsewhere - it is not by itself
making any claims. Comments in the script are an attempt at reverse engineering
possible explanations - they are not showing any intent or confirming it is
correct.

Three files are generated / modified by this script:

kallithea/templates/about.html claims to show copyright holders, and the GPL
license requires such existing "legal notices" to be preserved. We also try to
keep it updated with copyright holders, but do not claim it is a correct list.

CONTRIBUTORS has the purpose of giving credit where credit is due and list all
the contributor names in the source.

kallithea/templates/base/base.html contains the copyright years in the page
footer.

Both make a best effort of listing all copyright holders, but revision control
history might be a better and more definitive source.

Contributors are sorted "fairly" by copyright year and amount of
contribution.

New contributors are listed, without considering if the contribution contains
copyrightable work.

When the copyright might belong to a different legal entity than the
contributor, the legal entity is given credit too.
"""

import os
import re
from collections import defaultdict

import contributor_data


def sortkey(x):
    """Return key for sorting contributors "fairly":
    * latest contribution
    * first contribution
    * number of contribution years
    * name (with some unicode normalization)
    The entries must be 2-tuples of a list of string years and the unicode name"""
    return (x[0] and -int(x[0][-1]),
            x[0] and int(x[0][0]),
            -len(x[0]),
            x[1].decode('utf-8').lower().replace(u'\xe9', u'e').replace(u'\u0142', u'l')
        )


def nice_years(l, dash='-', join=' '):
    """Convert a list of years into brief range like '1900-1901, 1921'."""
    if not l:
        return ''
    start = end = int(l[0])
    ranges = []
    for year in l[1:] + [0]:
        year = int(year)
        if year == end + 1:
            end = year
            continue
        if start == end:
            ranges.append('%s' % start)
        else:
            ranges.append('%s%s%s' % (start, dash, end))
        start = end = year
    assert start == 0 and end == 0, (start, end)
    return join.join(ranges)


def insert_entries(
        filename,
        all_entries,
        no_entries,
        domain_extra,
        split_re,
        normalize_name,
        format_f):
    """Update file with contributor information.
    all_entries: list of tuples with year and name
    no_entries: set of names or name and year tuples to ignore
    domain_extra: map domain name to extra credit name
    split_re: regexp matching the part of file to rewrite
    normalize_name: function to normalize names for grouping and display
    format_f: function formatting year list and name to a string
    """
    name_years = defaultdict(set)

    for year, name in all_entries:
        if name in no_entries or (name, year) in no_entries:
            continue
        domain = name.split('@', 1)[-1].rstrip('>')
        if domain in domain_extra:
            name_years[domain_extra[domain]].add(year)
        name_years[normalize_name(name)].add(year)

    l = [(list(sorted(year for year in years if year)), name)
         for name, years in name_years.items()]
    l.sort(key=sortkey)

    with open(filename) as f:
        pre, post = re.split(split_re, f.read())

    with open(filename, 'w') as f:
        f.write(pre +
                ''.join(format_f(years, name) for years, name in l) +
                post)


def main():
    repo_entries = [
        (year, contributor_data.name_fixes.get(name) or contributor_data.name_fixes.get(name.rsplit('<', 1)[0].strip()) or name)
        for year, name in
        (line.strip().split(' ', 1)
         for line in os.popen("""hg log -r '::.' -T '{date(date,"%Y")} {author}\n'""").readlines())
        ]

    insert_entries(
        filename='kallithea/templates/about.html',
        all_entries=repo_entries + contributor_data.other_about + contributor_data.other,
        no_entries=contributor_data.no_about,
        domain_extra=contributor_data.domain_extra,
        split_re=r'(?:  <li>Copyright &copy; [^\n]*</li>\n)*',
        normalize_name=lambda name: name.split('<', 1)[0].strip(),
        format_f=lambda years, name: '  <li>Copyright &copy; %s, %s</li>\n' % (nice_years(years, '&ndash;', ', '), name),
        )

    insert_entries(
        filename='CONTRIBUTORS',
        all_entries=repo_entries + contributor_data.other_contributors + contributor_data.other,
        no_entries=contributor_data.total_ignore,
        domain_extra=contributor_data.domain_extra,
        split_re=r'(?:    [^\n]*\n)*',
        normalize_name=lambda name: name,
        format_f=lambda years, name: ('    %s%s%s\n' % (name, ' ' if years else '', nice_years(years))),
        )

    insert_entries(
        filename='kallithea/templates/base/base.html',
        all_entries=repo_entries,
        no_entries=contributor_data.total_ignore,
        domain_extra={},
        split_re=r'(?<=&copy;) .* (?=by various authors)',
        normalize_name=lambda name: '',
        format_f=lambda years, name: ' ' + nice_years(years, '&ndash;', ', ') + ' ',
        )

    #docs/conf.py:copyright = u'2010-2016 by various authors, licensed as GPLv3.'
    insert_entries(
        filename='docs/conf.py',
        all_entries=repo_entries,
        no_entries=contributor_data.total_ignore,
        domain_extra={},
        split_re=r"(?<=copyright = u').*(?= by various authors)",
        normalize_name=lambda name: '',
        format_f=lambda years, name: nice_years(years, '-', ', '),
        )


if __name__ == '__main__':
    main()


# To list new contributors since last tagging:
# { hg log -r '::tagged()' -T '    {author}\n    {author}\n'; hg log -r '::.' -T '    {author}\n' | sort | uniq; } | sort | uniq -u