""" Given a path to a directory of CodeQL logs, this script analyzes the
contents of those logs to determine whether the CodeQL extractor encountered
any errors that affect the integrity of the finalized database.
On success (database is intact), this script returns 0.
On failure (database is corrupt OR has unknown status), this script returns -1
and outputs a list of the specific logfiles that correspond to integrity errors,
plus a list of the specific logfiles containing unknown errors."""
import os
import argparse
KNOWN_INCONSEQUENTIAL_ERRORS = [
"Unknown expr kind 30",
"Unexpected template kind 9",
"Unknown expr kind 31",
"Unknown expr kind 34",
"Unexpected dynamic init kind 1",
"Unexpected dynamic init kind 2",
"Unexpected dynamic init kind 3",
"Unexpected dynamic init kind 6",
"Unexpected dynamic init kind 7",
"Unknown kind 5",
("In fabricate_destructors_expr: Unsupported expression kind encountered "
"while fabricating destructors (fabricate_destructors_expr, 31)."),
]
KNOWN_PROBLEMATIC_ERRORS = [
('In construct_text_message: "../../third_party/dawn/src/dawn/native/'
'webgpu_absl_format.cpp", line 118: internal error: assertion failed: '
'cast_node: cast to class type (exprutil.c, line 9462 in cast_node)'),
('Warning[extractor-c++]: In construct_text_message: "../../base/functional'
'/function_ref.h", line 69: internal error: assertion failed at: '
'"decls.c", line 21165 in mark_decl_after_first_in_comma_list')
]
GENERIC_EXTRACTOR_ERROR = ('Warning[extractor-c++]: In main: Extractor exiting '
'with code 1')
def line_in_list(line, errlist):
""" For a given line, checks if it matches any error in errlist. """
return any([line in error for error in errlist])
def check_if_log_lines_indicate_nontrivial_error(
filename, log_lines, extractor_logs_indicating_integrity_errors,
extractor_logs_containing_unknown_errors):
""" For a given list of log_lines, checks if any of them (1) known problematic
errors or (2) unknown errors, and stores the filename in the appropriate
list if so."""
for line in log_lines:
if "Warning[extractor-c++]:" not in line:
continue
if line_in_list(line, KNOWN_INCONSEQUENTIAL_ERRORS):
pass
elif line_in_list(line, KNOWN_PROBLEMATIC_ERRORS):
extractor_logs_indicating_integrity_errors.append(filename)
return
else:
extractor_logs_containing_unknown_errors.append(filename)
return
def main():
parser = argparse.ArgumentParser(
description='Parse arguments for validation of CodeQL databases')
parser.add_argument(
'--codeql_log_path',
'-l',
type=str,
required=True,
help='Absolute path of a CodeQL log directory (e.g. "/codeql_db/logs")')
args = parser.parse_args()
logs_directory_path = os.path.abspath(os.path.expanduser(
args.codeql_log_path))
extractor_logs_directory_path = os.path.join(logs_directory_path, 'extractor')
extractor_logs_indicating_integrity_errors = []
extractor_logs_containing_unknown_errors = []
num_files_scanned = 0
num_files_with_errors = 0
for root, dirs, files in os.walk(extractor_logs_directory_path):
for filename in files:
num_files_scanned += 1
filename = os.path.join(root, filename)
with open(filename, 'r') as f:
lines = f.read().splitlines()
if not GENERIC_EXTRACTOR_ERROR in lines[-1]:
continue
num_files_with_errors += 1
check_if_log_lines_indicate_nontrivial_error(
filename, lines, extractor_logs_indicating_integrity_errors,
extractor_logs_containing_unknown_errors)
if not (extractor_logs_indicating_integrity_errors
or extractor_logs_containing_unknown_errors):
print("Database contains no integrity errors.")
return 0
print("A problem was detected with the database.")
print("Paths of logfiles that indicate integrity errors:")
for filepath in extractor_logs_indicating_integrity_errors:
print(filepath)
print("Paths of logfiles containing unknown errors:")
for filepath in extractor_logs_containing_unknown_errors:
print(filepath)
print(f"{num_files_with_errors}/{num_files_scanned}"
" files contained errors")
print(f"{len(extractor_logs_containing_unknown_errors)} unknown errors")
print(f"{len(extractor_logs_indicating_integrity_errors)} integrity errors")
return -1
if __name__ == '__main__':
exit(main())