]>
Commit | Line | Data |
---|---|---|
9aab28b6 | 1 | #!/usr/bin/env python3 |
6bfa0367 | 2 | # SPDX-License-Identifier: GPL-2.0+ |
45765eed MY |
3 | # |
4 | # Copyright (C) 2014, Masahiro Yamada <[email protected]> | |
45765eed MY |
5 | |
6 | ''' | |
7 | A tool to create/update the mailmap file | |
8 | ||
9 | The command 'git shortlog' summarizes git log output in a format suitable | |
10 | for inclusion in release announcements. Each commit will be grouped by | |
11 | author and title. | |
12 | ||
13 | One problem is that the authors' name and/or email address is sometimes | |
14 | spelled differently. The .mailmap feature can be used to coalesce together | |
15 | commits by the same persion. | |
16 | (See 'man git-shortlog' for furthur information of this feature.) | |
17 | ||
18 | This tool helps to create/update the mailmap file. | |
19 | ||
20 | It runs 'git shortlog' internally and searches differently spelled author | |
21 | names which share the same email address. The author name with the most | |
22 | commits is asuumed to be a canonical real name. If the number of commits | |
23 | from the cananonical name is equal to or greater than 'MIN_COMMITS', | |
24 | the entry for the cananical name will be output. ('MIN_COMMITS' is used | |
25 | here because we do not want to create a fat mailmap by adding every author | |
26 | with only a few commits.) | |
27 | ||
28 | If there exists a mailmap file specified by the mailmap.file configuration | |
29 | options or '.mailmap' at the toplevel of the repository, it is used as | |
30 | a base file. (The mailmap.file configuration takes precedence over the | |
31 | '.mailmap' file if both exist.) | |
32 | ||
33 | The base file and the newly added entries are merged together and sorted | |
34 | alphabetically (but the comment block is kept untouched), and then printed | |
35 | to standard output. | |
36 | ||
37 | Usage | |
38 | ----- | |
39 | ||
40 | scripts/mailmapper | |
41 | ||
42 | prints the mailmapping to standard output. | |
43 | ||
44 | scripts/mailmapper > tmp; mv tmp .mailmap | |
45 | ||
46 | will be useful for updating '.mailmap' file. | |
47 | ''' | |
48 | ||
49 | import sys | |
50 | import os | |
51 | import subprocess | |
52 | ||
53 | # The entries only for the canonical names with MIN_COMMITS or more commits. | |
54 | # This limitation is used so as not to create a too big mailmap file. | |
55 | MIN_COMMITS = 50 | |
56 | ||
57 | try: | |
58 | toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) | |
59 | except subprocess.CalledProcessError: | |
31e2141d | 60 | sys.exit('Please run in a git repository.') |
45765eed MY |
61 | |
62 | # strip '\n' | |
63 | toplevel = toplevel.rstrip() | |
64 | ||
65 | # Change the current working directory to the toplevel of the respository | |
66 | # for our easier life. | |
67 | os.chdir(toplevel) | |
68 | ||
69 | # First, create 'auther name' vs 'number of commits' database. | |
70 | # We assume the name with the most commits as the canonical real name. | |
71 | shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) | |
72 | ||
73 | commits_per_name = {} | |
74 | ||
75 | for line in shortlog.splitlines(): | |
76 | try: | |
77 | commits, name = line.split(None, 1) | |
78 | except ValueError: | |
79 | # ignore lines with an empty author name | |
80 | pass | |
81 | commits_per_name[name] = int(commits) | |
82 | ||
83 | # Next, coalesce the auther names with the same email address | |
84 | shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) | |
85 | ||
86 | mail_vs_name = {} | |
87 | output = {} | |
88 | ||
89 | for line in shortlog.splitlines(): | |
90 | # tmp, mail = line.rsplit(None, 1) is not safe | |
91 | # because weird email addresses might include whitespaces | |
45765eed | 92 | try: |
9aab28b6 HS |
93 | line = line.decode("utf-8") |
94 | tmp, mail = line.split('<') | |
95 | mail = '<' + mail.rstrip() | |
45765eed MY |
96 | _, name = tmp.rstrip().split(None, 1) |
97 | except ValueError: | |
98 | # author name is empty | |
99 | name = '' | |
100 | if mail in mail_vs_name: | |
101 | # another name for the same email address | |
102 | prev_name = mail_vs_name[mail] | |
103 | # Take the name with more commits | |
9aab28b6 HS |
104 | try: |
105 | major_name = sorted([prev_name, name], | |
106 | key=lambda x: commits_per_name[x] if x else 0)[1] | |
107 | except: | |
108 | continue | |
45765eed MY |
109 | mail_vs_name[mail] = major_name |
110 | if commits_per_name[major_name] > MIN_COMMITS: | |
111 | output[mail] = major_name | |
112 | else: | |
113 | mail_vs_name[mail] = name | |
114 | ||
115 | # [1] If there exists a mailmap file at the location pointed to | |
116 | # by the mailmap.file configuration option, update it. | |
117 | # [2] If the file .mailmap exists at the toplevel of the repository, update it. | |
118 | # [3] Otherwise, create a new mailmap file. | |
119 | mailmap_files = [] | |
120 | ||
121 | try: | |
122 | config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) | |
123 | except subprocess.CalledProcessError: | |
124 | config_mailmap = '' | |
125 | ||
126 | config_mailmap = config_mailmap.rstrip() | |
127 | if config_mailmap: | |
128 | mailmap_files.append(config_mailmap) | |
129 | ||
130 | mailmap_files.append('.mailmap') | |
131 | ||
132 | infile = None | |
133 | ||
134 | for map_file in mailmap_files: | |
135 | try: | |
136 | infile = open(map_file) | |
137 | except: | |
138 | # Failed to open. Try next. | |
139 | continue | |
140 | break | |
141 | ||
142 | comment_block = [] | |
143 | output_lines = [] | |
144 | ||
145 | if infile: | |
146 | for line in infile: | |
147 | if line[0] == '#' or line[0] == '\n': | |
148 | comment_block.append(line) | |
149 | else: | |
150 | output_lines.append(line) | |
151 | break | |
152 | for line in infile: | |
153 | output_lines.append(line) | |
154 | infile.close() | |
155 | ||
156 | for mail, name in output.items(): | |
157 | output_lines.append(name + ' ' + mail + '\n') | |
158 | ||
159 | output_lines.sort() | |
160 | ||
161 | sys.stdout.write(''.join(comment_block + output_lines)) |