From 55b7ba0645406b4461b3a953ccf9e707e95644aa Mon Sep 17 00:00:00 2001 From: Anas Sulaiman Date: Thu, 1 Feb 2024 17:13:39 +0000 Subject: Fix non-deterministic python compilation Timestamps in pyc files and zip archives were causing cache misses in RBE. Bug: b/322788229 Test: Ran a few local builds and verified improved cache hit rate for metalava actions. Change-Id: I9375694d26a107f5c0c43bbd685bd8e86349d187 --- python/scripts/precompile_python.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'python/scripts/precompile_python.py') diff --git a/python/scripts/precompile_python.py b/python/scripts/precompile_python.py index e12e7d24c..80e7c76a1 100644 --- a/python/scripts/precompile_python.py +++ b/python/scripts/precompile_python.py @@ -16,6 +16,7 @@ import argparse import py_compile import os +import sys import shutil import tempfile import zipfile @@ -23,22 +24,31 @@ import zipfile # This file needs to support both python 2 and 3. -def process_one_file(name, inf, outzip): - if not name.endswith('.py'): - outzip.writestr(name, inf.read()) +def process_one_file(info, infile, outzip): + if not info.filename.endswith('.py'): + outzip.writestr(info, infile.read()) return # Unfortunately py_compile requires the input/output files to be written # out to disk. with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp: - shutil.copyfileobj(inf, tmp) + shutil.copyfileobj(infile, tmp) in_name = tmp.name with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp: out_name = tmp.name try: - py_compile.compile(in_name, out_name, name, doraise=True) + # Ensure deterministic pyc by using the hash rather than timestamp. + # This is required to improve caching in accelerated builds. + # Only works on Python 3.7+ (see https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode) + # which should cover most updated branches and developer machines. + if sys.version_info >= (3, 7): + py_compile.compile(in_name, out_name, info.filename, doraise=True, invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH) + else: + py_compile.compile(in_name, out_name, info.filename, doraise=True) with open(out_name, 'rb') as f: - outzip.writestr(name + 'c', f.read()) + info.filename = info.filename + 'c' + # Use ZipInfo rather than str to reuse timestamps for deterministic zip files. + outzip.writestr(info, f.read()) finally: os.remove(in_name) os.remove(out_name) @@ -52,9 +62,9 @@ def main(): with open(args.dst_zip, 'wb') as outf, open(args.src_zip, 'rb') as inf: with zipfile.ZipFile(outf, mode='w') as outzip, zipfile.ZipFile(inf, mode='r') as inzip: - for name in inzip.namelist(): - with inzip.open(name, mode='r') as inzipf: - process_one_file(name, inzipf, outzip) + for info in inzip.infolist(): + with inzip.open(info.filename, mode='r') as inzipf: + process_one_file(info, inzipf, outzip) if __name__ == "__main__": -- cgit v1.2.3 From c755fdbe8e24f3f4f647c88fd65919df66bfe210 Mon Sep 17 00:00:00 2001 From: Anas Sulaiman Date: Tue, 6 Feb 2024 19:09:44 +0000 Subject: Fix non-deterministic python compilation This CL fixes timestamps for zip entries to a hardcoded value to ensure deterministic zip archives across checkouts, not just for incremental builds. The value chosen for the fixed date is the same as the one used by soong_zip. Bug: b/322788229 Test: Ran a few builds and verified that mdate for zipped files are fixed as well as the generated zip files have identical hashes. Change-Id: I3cd6fcf559d4d83d8813c93319b46e267ae64a2b --- python/scripts/precompile_python.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'python/scripts/precompile_python.py') diff --git a/python/scripts/precompile_python.py b/python/scripts/precompile_python.py index 80e7c76a1..aa1a5df53 100644 --- a/python/scripts/precompile_python.py +++ b/python/scripts/precompile_python.py @@ -24,7 +24,12 @@ import zipfile # This file needs to support both python 2 and 3. -def process_one_file(info, infile, outzip): +def process_one_file(name, infile, outzip): + # Create a ZipInfo instance with a fixed date to ensure a deterministic output. + # Date was chosen to be the same as + # https://cs.android.com/android/platform/superproject/main/+/main:build/soong/jar/jar.go;l=36;drc=2863e4535eb65e15f955dc8ed48fa99b1d2a1db5 + info = zipfile.ZipInfo(filename=name, date_time=(2008, 1, 1, 0, 0, 0)) + if not info.filename.endswith('.py'): outzip.writestr(info, infile.read()) return @@ -37,17 +42,15 @@ def process_one_file(info, infile, outzip): with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp: out_name = tmp.name try: - # Ensure deterministic pyc by using the hash rather than timestamp. - # This is required to improve caching in accelerated builds. - # Only works on Python 3.7+ (see https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode) - # which should cover most updated branches and developer machines. + # Ensure a deterministic .pyc output by using the hash rather than the timestamp. + # Only works on Python 3.7+ + # See https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode if sys.version_info >= (3, 7): py_compile.compile(in_name, out_name, info.filename, doraise=True, invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH) else: py_compile.compile(in_name, out_name, info.filename, doraise=True) with open(out_name, 'rb') as f: info.filename = info.filename + 'c' - # Use ZipInfo rather than str to reuse timestamps for deterministic zip files. outzip.writestr(info, f.read()) finally: os.remove(in_name) @@ -62,9 +65,9 @@ def main(): with open(args.dst_zip, 'wb') as outf, open(args.src_zip, 'rb') as inf: with zipfile.ZipFile(outf, mode='w') as outzip, zipfile.ZipFile(inf, mode='r') as inzip: - for info in inzip.infolist(): - with inzip.open(info.filename, mode='r') as inzipf: - process_one_file(info, inzipf, outzip) + for name in inzip.namelist(): + with inzip.open(name, mode='r') as inzipf: + process_one_file(name, inzipf, outzip) if __name__ == "__main__": -- cgit v1.2.3