910e62b5创建于 1月15日历史提交
#!/usr/bin/env python3
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import re
import os
from collections import defaultdict

from datetime import datetime
from filters import avoid_commit

# This regex is for file change details that look like the following:
#  ios/chrome/browser/tab_switcher/ui_bundled/tab_grid/tab_grid_coordinator.mm    | 4 ++--
# or like the following in case that stat=500 is not enough:
# .../browser/ui/tab_switcher/tab_grid/grid/tab_groups/tab_groups_constants.mm    | 2 +
FILE_CHANGE_REGEX = r"""
    ^ \s*                        # Start of the line with one or more spaces.
    (ios|\.{3})                  # Root folder ('ios' or '...').
    \/                           # Path separator.
    (\w+\/)*                     # Zero or more directory segments.
    (\w+                         # Filename.
      (\.\w+)?                   # Optional extension.
    )
    \s* \| .* \s*                # Pipe separator, e.g., " | 10 ".
    [0-9]+                       # Number of changed lines.
    \s [+-]+                     # +/- indicators.
    $                            # End of the line.
"""


class Commit:
    """A class to represent a single Git commit.

    Parses a raw commit description string to extract key information such as
    author, reviewers, date, changed files, and the primary modified folder.
    """

    def __init__(self,
                 commit_description: str,
                 skip_tests: bool = True) -> None:
        """Initializes a Commit object from a raw commit commit_description.

        Args:
            commit_description: A string containing the full text of a Git
                commit, including hash, author, date, and file statistics.
            skip_tests: If True, changes in directories named 'test' will be
                ignored.
        """
        self.author = ''
        self.reviewers = []
        self.files_stats = defaultdict(int)
        self.date = datetime.min
        self.modified_path = ''
        self.total_change = 0
        self.hash = ''
        self.skip_tests = skip_tests
        if avoid_commit(commit_description):
            return

        lines = commit_description.split('\n')
        self.hash = lines[0]
        for line in lines[1:]:
            self.analyse_line(line)

        if self.total_change == 0:
            return
        self.determine_modified_folder()

    def all_informations(self):
        """Returns all extracted commit information.

        Returns:
            A tuple containing the author, a list of reviewers, a dictionary of
            file stats, the primary modified path, the commit date, and the
            commit hash.
        """
        return (self.author, self.reviewers, self.files_stats,
                self.modified_path, self.date, self.hash)

    def extend_paths(self) -> list[dict[str:int]]:
        """Expands file paths to include all parent directories.

        Aggregates the line changes from individual files into their parent
        directories, providing a view of changes at every level of the
        directory tree.

        Returns:
            A dictionary where keys are directory paths and values are the
            sum of line changes within that directory and its subdirectories,
            sorted by path depth.
        """
        all_paths = defaultdict(int, self.files_stats)
        for path in self.files_stats:
            dirname = os.path.dirname(path)
            while (dirname):
                all_paths[dirname] += self.files_stats[path]
                dirname = os.path.dirname(dirname)

        # Sort the dictionary by path length.
        result = dict(
            reversed(
                sorted(all_paths.items(), key=lambda x: len(x[0].split('/')))))
        return result

    def determine_modified_folder(self):
        """Identifies the primary folder modified in the commit.

        Sets the `modified_path` instance variable to the path that contains
        more than 50% of the total line changes for the commit.
        """
        extanded_files_stats = self.extend_paths()
        for file in extanded_files_stats:
            stat = extanded_files_stats[file] * 100 / self.total_change
            if stat > 50:
                self.modified_path = file
                return

    def extract_username_from_line(self, line: str) -> str:
        """Extracts a username from a commit metadata line.

        Args:
            line: A string from the commit description, e.g., "Author: ..."
                or "Reviewed-by: ...".

        Returns:
            The extracted username (the part of the email before the '@').
        """
        lineDetail = line.split()
        email = lineDetail[-1][1:-1]
        username = email.split('@')[0]
        return username

    def analyse_line(self, line: str) -> None:
        """Parses a single line of a commit description.

        Updates the instance variables (author, reviewers, date, file_stats)
        based on the content of the line. Skips lines indicating changes to
        binary files.

        Args:
            line: A single line from the commit description text.
        """
        if line.startswith('Author:'):
            self.author = self.extract_username_from_line(line)
            return
        if 'Reviewed-by:' in line:
            username = self.extract_username_from_line(line)
            self.reviewers.append(username)
            return
        if line.startswith('Date:'):
            self.date = datetime.strptime(' '.join(line.split()[1:-1]),
                                          '%a %b %d %H:%M:%S %Y')
            return
        if re.match(FILE_CHANGE_REGEX, line, re.VERBOSE):
            path = line.split()[0]
            if self.skip_tests and 'test' in path.split(os.path.sep):
                return
            change_count = int(line.split()[-2])
            self.total_change += change_count
            directory = os.path.dirname(path)
            self.files_stats[directory] += change_count