"""This script will search through the target folder specified and try to find
duplicate includes from h and cc files, and remove them from the cc files. The
current/working directory needs to be chromium_checkout/src/ when this tool is
run.
Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
"""
from __future__ import print_function
import argparse
import collections
import logging
import os
import re
import sys
H_FILE_SUFFIX = '.h'
CC_FILE_SUFFIX = '.cc'
INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')
def HasSuffix(file_name, suffix):
return os.path.splitext(file_name)[1] == suffix
def IsEmpty(line):
return not line.strip()
def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
"""Finds and returns the corresponding include set for the given .cc file.
This is done by finding the first include in the file and then trying to look
up an .h file in the passed in map. If not present, then None is returned
immediately.
"""
for line in input_lines:
match = INCLUDE_REGEX.search(line)
if match:
h_file_path = os.path.join(os.getcwd(), match.group(2))
if h_file_path not in h_path_to_include_set:
print('First include did not match to a known .h file, skipping ' + \
cc_file_name + ', line: ' + match.group(1))
return None
return h_path_to_include_set[h_file_path]
def WithoutDuplicates(input_lines, include_set, cc_file_name):
"""Checks every input line and sees if we can remove it based on the contents
of the given include set.
Returns what the new contents of the file should be.
"""
output_lines = []
lastCopiedLineWasEmpty = False
lastLineWasOmitted = False
for line in input_lines:
match = INCLUDE_REGEX.search(line)
if match and match.group(2) in include_set:
print('Removed ' + match.group(1) + ' from ' + cc_file_name)
lastLineWasOmitted = True
elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
print('Removed empty line from ' + cc_file_name)
lastLineWasOmitted = True
else:
lastCopiedLineWasEmpty = IsEmpty(line)
lastLineWasOmitted = False
output_lines.append(line)
return output_lines
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--dry-run', action='store_true',
help='Does not actually remove lines when specified.')
parser.add_argument('targets', nargs='+',
help='Relative path to folders to search for duplicate includes in.')
args = parser.parse_args()
h_path_to_include_set = {}
cc_file_path_set = set()
for relative_root in args.targets:
absolute_root = os.path.join(os.getcwd(), relative_root)
for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
for file_name in file_name_list:
file_path = os.path.join(dir_path, file_name)
if HasSuffix(file_name, H_FILE_SUFFIX):
h_path_to_include_set[file_path] = set()
with open(file_path) as fh:
for line in fh:
match = INCLUDE_REGEX.search(line)
if match:
h_path_to_include_set[file_path].add(match.group(2))
elif HasSuffix(file_name, CC_FILE_SUFFIX):
cc_file_path_set.add(file_path)
for cc_file_path in cc_file_path_set:
cc_file_name = os.path.basename(cc_file_path)
with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
input_lines = fh.readlines()
fh.seek(0)
include_set = FindIncludeSet(input_lines, h_path_to_include_set,
cc_file_name)
if include_set:
output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
if not args.dry_run:
fh.writelines(output_lines)
fh.truncate()
if __name__ == '__main__':
sys.exit(main())