aboutsummaryrefslogtreecommitdiff
path: root/scripts/disk_usage.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/disk_usage.py')
-rw-r--r--scripts/disk_usage.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/scripts/disk_usage.py b/scripts/disk_usage.py
new file mode 100644
index 000000000..940cea73a
--- /dev/null
+++ b/scripts/disk_usage.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""Calculates the disk size of each repository managed by 'repo'.
+
+This script invokes 'repo forall ...' to get the disk usage
+for each project repository and prints the combined output as a CSV.
+"""
+
+import os
+import re
+import subprocess
+import sys
+
+
+def get_repo_disk_usage() -> dict[str, int]:
+ """Invokes 'repo forall -p -c du -s .' and parses the output into a dictionary.
+
+ Returns:
+ A dictionary mapping project paths (str) to their disk size in bytes (int).
+ Returns an empty dictionary if the input is empty or malformed.
+
+ Raises:
+ subprocess.CalledProcessError: If the command returns a non-zero exit
+ code.
+ FileNotFoundError: If the 'repo' command is not found.
+ """
+ output = subprocess.check_output(
+ ["repo", "forall", "-p", "-c", "du", "-s", "-b", "."],
+ text=True,
+ )
+
+ project_sizes: dict[str, int] = {}
+ lines = output.strip().split("\n")
+ current_project_name = None
+
+ for line in lines:
+ line = line.strip()
+ if not line:
+ continue # Skip empty lines
+
+ if line.startswith("project "):
+ # Extract project name: remove "project " prefix and trailing "/"
+ current_project_name = line.removeprefix("project ").removesuffix("/")
+ elif current_project_name is not None:
+ match = re.match(r"^(\d+)\s+\.$", line)
+ if not match:
+ continue
+ size_str = match.group(1)
+ project_sizes[current_project_name] = int(size_str)
+ current_project_name = None # Reset for the next project
+
+ return project_sizes
+
+
+def get_dot_repo_size() -> int:
+ """Gets the disk usage of the '.repo' directory in bytes.
+
+ Returns:
+ The size of the '.repo' directory in bytes. Returns 0 if the command
+ fails or the directory doesn't exist (du returns 0).
+
+ Raises:
+ FileNotFoundError: If the 'du' command is not found.
+ # Note: subprocess.CalledProcessError is not explicitly raised on failure
+ # because we want to return 0 in that case.
+ """
+
+ result = subprocess.check_output(["du", "-s", "-b", ".repo"], text=True)
+ size_str = result.split()[0]
+ return int(size_str)
+
+
+def main():
+ if not os.path.isdir(".repo"):
+ sys.exit("Error: .repo directory not found, run inside a repo root.")
+
+ project_sizes = get_repo_disk_usage()
+ dot_repo_size = get_dot_repo_size()
+
+ print("project_name,size_bytes")
+ print(f".repo,{dot_repo_size}")
+ for name, size_bytes in sorted(project_sizes.items()):
+ print(f"{name},{size_bytes}")
+
+
+if __name__ == "__main__":
+ main()