From a27f6c7d6288aadc79d14f22d12e657dfc67477f Mon Sep 17 00:00:00 2001 From: Yixiang Gao Date: Fri, 15 Sep 2023 23:43:47 -0500 Subject: [PATCH] add diff mode to sz.py (#1872) --- sz.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/sz.py b/sz.py index 3514e9d3..c70280c8 100755 --- a/sz.py +++ b/sz.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import os -from pathlib import Path +import os, sys import token import tokenize import itertools @@ -8,21 +7,62 @@ from tabulate import tabulate TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING] -if __name__ == "__main__": - headers = ["Name", "Lines", "Tokens/Line"] +def gen_stats(base_path="."): table = [] - for path, subdirs, files in os.walk("tinygrad"): + for path, _, files in os.walk(os.path.join(base_path, "tinygrad")): for name in files: if not name.endswith(".py"): continue - filepath = Path(path) / name + filepath = os.path.join(path, name) + relfilepath = os.path.relpath(filepath, base_path) with tokenize.open(filepath) as file_: tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST] token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens])) - table.append([filepath.as_posix(), line_count, token_count/line_count]) + table.append([relfilepath, line_count, token_count/line_count]) + return table - print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n") +def gen_diff(table_old, table_new): + table = [] + files_new = set([x[0] for x in table_new]) + files_old = set([x[0] for x in table_old]) + added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old + if added: + for file in added: + file_stat = [stats for stats in table_new if file in stats] + table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0]) + if deleted: + for file in deleted: + file_stat = [stats for stats in table_old if file in stats] + table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]]) + if unchanged: + for file in unchanged: + file_stat_old = [stats for stats in table_old if file in stats] + file_stat_new = [stats for stats in table_new if file in stats] + if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0: + table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2], file_stat_new[0][2]-file_stat_old[0][2]]) + return table - for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1]) for x in table]), key=lambda x:x[0]): - print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}") +def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff) - print(f"\ntotal line count: {sum([x[1] for x in table])}") +if __name__ == "__main__": + if len(sys.argv) == 3: + headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"] + table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2])) + elif len(sys.argv) == 2: + headers = ["Name", "Lines", "Tokens/Line"] + table = gen_stats(sys.argv[1]) + else: + headers = ["Name", "Lines", "Tokens/Line"] + table = gen_stats(".") + + if table: + if len(sys.argv) == 3: + print("### Changes") + print("```") + print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"), floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n") + print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}") + print("```") + else: + print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n") + for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]): + print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}") + print(f"\ntotal line count: {sum([x[1] for x in table])}")