from __future__ import print_function
import argparse
import subprocess
import pickle
import re
import os
from pathlib import PurePath
from os import path
from datetime import date, timedelta
from collections import namedtuple, defaultdict
Commit = namedtuple('Commit', ['hash', 'author', 'commit_date', 'dirs'])
DIRECTORY_AUTHORS = defaultdict(dict)
OWNERS_CACHE = {}
CACHE_FILENAME = 'suggest_owners.cache'
def _RunGitCommand(options, cmd_args, pipe_output=False):
repo_path = path.join(options.repo_path, '.git')
cmd = ['git', '--git-dir', repo_path] + cmd_args
print('>', ' '.join(cmd))
if not pipe_output:
return subprocess.check_output(cmd, encoding='utf-8')
else:
return subprocess.Popen(cmd, encoding='utf-8',
stdout=subprocess.PIPE).stdout
def _ValidAuthor(author):
return author.endswith(
('@chromium.org', '@google.com')) and 'roller' not in author
def getEditsForDirectory(commit, directory):
additions = deletions = 0
for commit_directory, (directory_additions, directory_deletions) \
in commit.dirs.items():
if isSubDirectory(directory, commit_directory):
additions += directory_additions
deletions += directory_deletions
return additions, deletions
def _PropagateCommit(options, commit):
touched_dirs = set()
for directory in commit.dirs.keys():
while str(directory) != '.':
touched_dirs.add(str(directory))
directory = PurePath(directory).parent
for directory in touched_dirs:
author_commits, author_additions, author_deletions = \
DIRECTORY_AUTHORS[directory].get(commit.author, (0,0,0))
directory_additions, directory_deletions = \
getEditsForDirectory(commit, directory)
DIRECTORY_AUTHORS[directory][commit.author] = \
(author_commits + 1, author_additions + directory_additions,
author_deletions + directory_deletions)
def isSubDirectory(parent_directory, child_directory):
parent_directory = PurePath(parent_directory)
child_directory = PurePath(child_directory)
return child_directory.is_relative_to(parent_directory)
def _GetGitLogCmd(options):
date_limit = date.today() - timedelta(days=options.days_ago)
format_string = "%h,%ae,%cI"
cmd_args = [
'log',
'--since', date_limit.isoformat(),
'--numstat',
'--pretty=format:%s'%format_string,
]
if options.subdirectory:
cmd_args += ['--', options.subdirectory]
return cmd_args
def _ParseCommitLine(line):
commit_hash, author, commit_date = line.split(",")
return Commit(hash=commit_hash, author=author, commit_date=commit_date,
dirs={})
def _ParseFileStatsLine(current_commit, line):
try:
additions, deletions, filepath = line.split('\t')
except ValueError:
return False
if additions == '-':
additions = 0
else:
additions = int(additions)
if deletions == '-':
deletions = 0
else:
deletions = int(deletions)
if additions == 0 and deletions == 0:
return True
dir_path = path.dirname(filepath)
if '=>' in dir_path:
dir_path = re.sub(r'\{[^=]* => ([^\}]*)\}', r'\1', dir_path)
dir_path = dir_path.replace('//', '/')
commit_additions, commit_deletions = \
current_commit.dirs.get(dir_path, (0,0))
current_commit.dirs[dir_path] = (
additions + commit_additions, deletions + commit_deletions)
return True
def processAllCommits(options):
if not options.subdirectory and options.days_ago > 100:
print('git log for your query might take > 5 minutes, limit by a '
'subdirectory or reduce the number of days of history to low double '
'digits to make this faster. There is no progress indicator, it is '
'all waiting for single git log to finish.')
output_pipe = _RunGitCommand(options,
_GetGitLogCmd(options),
pipe_output=True)
current_commit = None
for line in iter(output_pipe.readline, ''):
line = line.rstrip('\n')
if current_commit is None:
current_commit = _ParseCommitLine(line)
else:
if line == '':
if _ValidAuthor(current_commit.author):
_PropagateCommit(options, current_commit)
current_commit = None
else:
if not _ParseFileStatsLine(current_commit, line):
current_commit = _ParseCommitLine(line)
if _ValidAuthor(current_commit.author):
_PropagateCommit(options, current_commit)
print('Done parsing commit log.')
def _CountCommits(directory):
return sum(
[count for (count, _a, _d) in DIRECTORY_AUTHORS[directory].values()])
def _GetOwnerLevel(options, author, directory):
sorted_owners = sorted(_GetOwners(options, directory), key=lambda e: e[1])
for owner, level in sorted_owners:
if author == owner:
return level
else:
return -1
def _GetOwners(options, directory_path):
if directory_path in OWNERS_CACHE:
return OWNERS_CACHE[directory_path]
owners_path = path.join(options.repo_path, directory_path, 'OWNERS')
owners = set()
parent_dir = directory_path
owner_level = 0
while parent_dir != '':
if path.isfile(owners_path):
parsed_owners, noparent = _ParseOwnersFile(options, owners_path)
owners.update([(owner, owner_level) for owner in parsed_owners])
owner_level += 1
if noparent:
break
parent_dir = path.dirname(parent_dir)
owners_path = path.join(parent_dir, 'OWNERS')
OWNERS_CACHE[directory_path] = set(owners)
return owners
def _ParseOwnersFile(options, filepath):
owners = set()
noparent = False
with open(filepath) as f:
for line in f.readlines():
line = line.strip()
if line == '' or line[0] == '#' or line.startswith('per-file'):
continue
if line.startswith('file://'):
relpath = line[7:]
abspath = path.join(options.repo_path, relpath)
parsed_owners, _ = _ParseOwnersFile(options, abspath)
owners.update(parsed_owners)
if line == 'set noparent':
noparent = True
index = line.find('@chromium.org')
if index > -1:
owners.add(line[:index + len('@chromium.org')])
return owners, noparent
def _IsTrivialDirectory(options, repo_subdir):
try:
return len(os.listdir(path.join(options.repo_path, repo_subdir))) == 1
except OSError:
return False
def computeSuggestions(options):
directory_suggestions = []
for directory, authors in sorted(DIRECTORY_AUTHORS.items()):
if _IsTrivialDirectory(options, directory):
continue
if _CountCommits(directory) < options.dir_commit_limit:
continue
if (options.subdirectory
and not isSubDirectory(options.subdirectory, directory)):
continue
sorted_authors = sorted(authors.items(), key=lambda entry: -entry[1][0])
suggestions = [(a,c) for a,c in sorted_authors if \
a not in options.ignore_authors \
and c[0] >= options.author_cl_limit]
directory_suggestions.append((directory, suggestions))
return directory_suggestions
def _PrintSettings(options):
print('Showing directories with at least ({}) commits in the last ({}) '
'days.'.format(options.dir_commit_limit, options.days_ago))
print('Showing top ({}) committers who have commited at least ({}) commits '
'to the directory in the last ({}) days.'.format(
options.max_suggestions, options.author_cl_limit,
options.days_ago))
print('(owners+N) represents distance through OWNERS files for said owner\n')
def printSuggestions(options, directory_suggestions):
print('\nCommit stats:')
_PrintSettings(options)
for directory, suggestions in directory_suggestions:
print('{}: {} commits in the last {} days'.format(
directory, _CountCommits(directory), options.days_ago))
non_owner_suggestions = 0
for author, (commit_count, additions, deletions) in suggestions:
owner_level = _GetOwnerLevel(options, author, directory)
if owner_level > -1:
owner_string = ' (owner+{})'.format(owner_level)
else:
non_owner_suggestions +=1
owner_string = ''
print('{}{}, commits: {}, additions:{}, deletions: {}'.format(
author, owner_string, commit_count, additions, deletions))
if non_owner_suggestions >= options.max_suggestions:
break
print()
def _GetHeadCommitHash(options):
return _RunGitCommand(options, ['rev-parse', 'HEAD']).strip()
def _GetCacheMetadata(options):
return _GetHeadCommitHash(options), options.days_ago, options.subdirectory
def _IsCacheValid(options, metadata):
head_hash, days_ago, cached_subdirectory = metadata
if head_hash != _GetHeadCommitHash(options):
return False
if days_ago != options.days_ago:
return False
if (cached_subdirectory is not None
and not isSubDirectory(cached_subdirectory, options.subdirectory)):
return False
return True
def cacheProcessedCommits(options):
metadata = _GetCacheMetadata(options)
with open(CACHE_FILENAME, 'wb') as f:
pickle.dump((metadata, DIRECTORY_AUTHORS), f)
def maybeRestoreProcessedCommits(options):
global DIRECTORY_AUTHORS
if not path.exists(CACHE_FILENAME):
return False
with open(CACHE_FILENAME, 'rb') as f:
stored_metadata, cached_directory_authors = pickle.load(f)
if _IsCacheValid(options, stored_metadata):
print('Loading from cache')
DIRECTORY_AUTHORS = cached_directory_authors
return True
else:
print('Cache is stale or invalid, must rerun `git log`')
return False
def do(options):
if options.skip_cache or not maybeRestoreProcessedCommits(options):
processAllCommits(options)
cacheProcessedCommits(options)
directory_suggestions = computeSuggestions(options)
printSuggestions(options, directory_suggestions)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('repo_path')
parser.add_argument('--days-ago', type=int,
help='Number of days of history to search through.',
default=365, metavar='DAYS_AGO')
parser.add_argument('--subdirectory',
help='Limit suggestions to this subdirectory', default='')
parser.add_argument('--ignore-authors',
help='Ignore this comma separated list of authors')
parser.add_argument('--max-suggestions', type=int, help='Maximum number of '
'suggested authors per directory.', default=5)
parser.add_argument('--author-cl-limit', type=int, help='Do not suggest '
'authors who have commited less than this to the '
'directory in the last DAYS_AGO days.', default=10)
parser.add_argument('--dir-commit-limit', type=int, help='Skip directories '
'with less than this number of commits in the last '
'DAYS_AGO days.', default=100)
parser.add_argument('--skip-cache', action='store_true',
help='Do not read from cache.', default=False)
options = parser.parse_args()
if options.ignore_authors:
options.ignore_authors = set(
map(str.strip, options.ignore_authors.split(',')))
else:
options.ignore_authors = set()
do(options)
if __name__ == '__main__':
main()