aboutsummaryrefslogtreecommitdiff
path: root/scripts/disk_usage.py
blob: 940cea73a11ed0dfb4fe5e672516c8e086065e4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
"""Calculates the disk size of each repository managed by 'repo'.

This script invokes 'repo forall ...' to get the disk usage
for each project repository and prints the combined output as a CSV.
"""

import os
import re
import subprocess
import sys


def get_repo_disk_usage() -> dict[str, int]:
  """Invokes 'repo forall -p -c du -s .' and parses the output into a dictionary.

  Returns:
    A dictionary mapping project paths (str) to their disk size in bytes (int).
    Returns an empty dictionary if the input is empty or malformed.

  Raises:
      subprocess.CalledProcessError: If the command returns a non-zero exit
        code.
      FileNotFoundError: If the 'repo' command is not found.
  """
  output = subprocess.check_output(
      ["repo", "forall", "-p", "-c", "du", "-s", "-b", "."],
      text=True,
  )

  project_sizes: dict[str, int] = {}
  lines = output.strip().split("\n")
  current_project_name = None

  for line in lines:
    line = line.strip()
    if not line:
      continue  # Skip empty lines

    if line.startswith("project "):
      # Extract project name: remove "project " prefix and trailing "/"
      current_project_name = line.removeprefix("project ").removesuffix("/")
    elif current_project_name is not None:
      match = re.match(r"^(\d+)\s+\.$", line)
      if not match:
        continue
      size_str = match.group(1)
      project_sizes[current_project_name] = int(size_str)
      current_project_name = None  # Reset for the next project

  return project_sizes


def get_dot_repo_size() -> int:
  """Gets the disk usage of the '.repo' directory in bytes.

  Returns:
      The size of the '.repo' directory in bytes. Returns 0 if the command
      fails or the directory doesn't exist (du returns 0).

  Raises:
      FileNotFoundError: If the 'du' command is not found.
      # Note: subprocess.CalledProcessError is not explicitly raised on failure
      # because we want to return 0 in that case.
  """

  result = subprocess.check_output(["du", "-s", "-b", ".repo"], text=True)
  size_str = result.split()[0]
  return int(size_str)


def main():
  if not os.path.isdir(".repo"):
    sys.exit("Error: .repo directory not found, run inside a repo root.")

  project_sizes = get_repo_disk_usage()
  dot_repo_size = get_dot_repo_size()

  print("project_name,size_bytes")
  print(f".repo,{dot_repo_size}")
  for name, size_bytes in sorted(project_sizes.items()):
    print(f"{name},{size_bytes}")


if __name__ == "__main__":
  main()