import dataclasses
import hashlib
import os
import pickle
from typing import Optional, Any, Dict
@dataclasses.dataclass(frozen=True)
class _PresubmitCheckContext:
"""Describes and identifies a context of a specific presubmit check.
This is used as a key to cache results of a presubmit check. The histograms
directory hash is used to lower the probability of changes in one file
impacing health status of presubmit checks in the other. This still doesn't
eliminate the risk of changes from outside of this directory affecting the
health status, but given how PRESUBMIT is triggered, this seems to be inline
with the risk of that happening because of PRESUBMIT not being triggered.
Attributes:
histograms_directory_hash: A sha256 hash of the entire contents of the
histograms directory (combined hash of all the files) that is used to
key the cache and invalidate it when the directory content changes.
check_id: A unique identifier of the check that is being cached. As the
single directory presubmit can have more then one check in it, this id
is used to determine which cache content should be used for which check.
"""
histograms_directory_hash: str
check_id: int
def key(self):
return f"{self.histograms_directory_hash}:{self.check_id}"
_CURRENT_CACHE_FILE_SCHEMA_VERSION = 1
@dataclasses.dataclass(frozen=True)
class CacheFileSchema:
"""Describes the schema of the cache file."""
version: int
data: Dict[str, Any]
def _CalculateCombinedDirectoryHash(directory_path):
"""Calculates a sha256 hash of the entire contents of a directory."""
hasher = hashlib.sha256()
for root, _, files in os.walk(directory_path):
for file in sorted(files):
file_path = os.path.join(root, file)
with open(file_path, "rb") as f:
chunk = f.read(4096)
while chunk:
hasher.update(chunk)
chunk = f.read(4096)
return hasher.hexdigest()
class PresubmitCache:
"""Stores and retrieves results of a presubmit checks for presubmits."""
_cache_contents: CacheFileSchema
_storage_file_path: str
_observed_directory: str
def __init__(self, storage_file_path: str, observed_directory_path: str):
self._storage_file_path = storage_file_path
self._observed_directory = observed_directory_path
self._cache_contents = CacheFileSchema(
version=_CURRENT_CACHE_FILE_SCHEMA_VERSION,
data={},
)
if not os.path.exists(self._storage_file_path) or os.path.getsize(
self._storage_file_path) == 0:
return
with open(self._storage_file_path, "rb") as f:
try:
loaded_cache = pickle.load(f)
if loaded_cache.version == _CURRENT_CACHE_FILE_SCHEMA_VERSION:
self._cache_contents = loaded_cache
except pickle.PickleError:
pass
def _GetForContext(self, context: _PresubmitCheckContext) -> Optional[str]:
if context.key() not in self._cache_contents.data:
return None
return self._cache_contents.data[context.key()]
def _StoreForContext(self, context: _PresubmitCheckContext,
check_result: Any):
self._cache_contents.data[context.key()] = check_result
self._SaveCurrentCache()
def _SaveCurrentCache(self):
with open(self._storage_file_path, "wb") as f:
pickle.dump(self._cache_contents, f)
def InspectCacheForTesting(self) -> CacheFileSchema:
return self._cache_contents
def RetrieveResultFromCache(self, check_id: int) -> Optional[Any]:
return self._GetForContext(
_PresubmitCheckContext(
histograms_directory_hash=_CalculateCombinedDirectoryHash(
self._observed_directory),
check_id=check_id,
))
def StoreResultInCache(self, check_id: int, check_result: Any):
self._StoreForContext(
_PresubmitCheckContext(
histograms_directory_hash=_CalculateCombinedDirectoryHash(
self._observed_directory),
check_id=check_id,
), check_result)