git-stats

a collection of scripts to analyze git repositories
git clone https://git.ce9e.org/git-stats.git

commit
008b65b15a2e097fd208935494f632b5fa241df1
parent
3a5b51e2e6228943166355ae763d0e48ca6e024f
Author
Tobias Bengfort <tobias.bengfort@posteo.de>
Date
2025-05-10 19:24
add git-retained

Diffstat

A git-retained.py 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 files changed, 95 insertions, 0 deletions


diff --git a/git-retained.py b/git-retained.py

@@ -0,0 +1,95 @@
   -1     1 # https://github.com/erikbern/git-of-theseus
   -1     2 
   -1     3 import csv
   -1     4 import datetime
   -1     5 import functools
   -1     6 import subprocess
   -1     7 import sys
   -1     8 
   -1     9 
   -1    10 def run(cmd):
   -1    11     p = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, encoding='utf-8')
   -1    12     return p.stdout.strip()
   -1    13 
   -1    14 
   -1    15 def get_first_date():
   -1    16     s = run(['sh', '-c', 'git log --reverse --format=%ad --date=iso | head -n 1'])
   -1    17     dt = datetime.datetime.fromisoformat(s)
   -1    18     return dt.date()
   -1    19 
   -1    20 
   -1    21 @functools.cache
   -1    22 def get_rev_at(date):
   -1    23     return (
   -1    24         run(['git', 'rev-list', '-n1', '--before', date.isoformat(), 'HEAD'])
   -1    25         or run(['git', 'hash-object', '-t', 'tree', '/dev/null'])
   -1    26     )
   -1    27 
   -1    28 
   -1    29 def get_changes(rev1, rev2):
   -1    30     s = run(['git', 'diff', '--shortstat', rev1, rev2])
   -1    31     parts = s.replace(',', '').split()
   -1    32 
   -1    33     try:
   -1    34         i = parts.index('insertions(+)')
   -1    35         added = int(parts[i - 1], 10)
   -1    36     except ValueError:
   -1    37         added = 0
   -1    38 
   -1    39     try:
   -1    40         i = parts.index('deletions(-)')
   -1    41         removed = int(parts[i - 1], 10)
   -1    42     except ValueError:
   -1    43         removed = 0
   -1    44 
   -1    45     return added, removed
   -1    46 
   -1    47 
   -1    48 def retained(a, b, c):
   -1    49     added_after_b, _ = get_changes(b, c)
   -1    50     added_in_total, _ = get_changes(a, c)
   -1    51     return added_in_total - added_after_b
   -1    52 
   -1    53 
   -1    54 def iter_months_since(start):
   -1    55     today = datetime.date.today()
   -1    56     year = start.year
   -1    57     month = start.month
   -1    58     while True:
   -1    59         date = datetime.date(year, month, 1)
   -1    60         yield date
   -1    61         if date > today:
   -1    62             break
   -1    63         month += 1
   -1    64         if month == 13:
   -1    65             year += 1
   -1    66             month = 1
   -1    67 
   -1    68 
   -1    69 def iter_years_since(start):
   -1    70     today = datetime.date.today()
   -1    71     return range(start.year, today.year + 1)
   -1    72 
   -1    73 
   -1    74 if __name__ == '__main__':
   -1    75     start = get_first_date()
   -1    76     w = csv.writer(sys.stdout)
   -1    77     w.writerow([''] + [str(year) for year in iter_years_since(start)])
   -1    78 
   -1    79     for date in iter_months_since(start):
   -1    80         rev = get_rev_at(date)
   -1    81         row = [date.isoformat()]
   -1    82 
   -1    83         for year in iter_years_since(start):
   -1    84             date_start = datetime.date(year, 1, 1)
   -1    85             date_end = datetime.date(year + 1, 1, 1)
   -1    86             rev_start = get_rev_at(date_start)
   -1    87             rev_end = get_rev_at(date_end)
   -1    88             if date <= date_start:
   -1    89                 row.append('0')
   -1    90             elif date <= date_end:
   -1    91                 row.append(str(retained(rev_start, rev, rev)))
   -1    92             else:
   -1    93                 row.append(str(retained(rev_start, rev_end, rev)))
   -1    94 
   -1    95         w.writerow(row)