tinygrad/sz.py

#!/usr/bin/env python3
import os, sys
import token
import tokenize
import itertools
from tabulate import tabulate

TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]

def is_docstring(t):
  return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')

def gen_stats(base_path="."):
  table = []
  for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
    for name in files:
      if not name.endswith(".py"): continue
      if 'tinygrad/runtime/autogen' in path: continue
      filepath = os.path.join(path, name)
      relfilepath = os.path.relpath(filepath, base_path)
      with tokenize.open(filepath) as file_:
        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
        token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
        if line_count > 0: table.append([relfilepath, line_count, token_count/line_count])
  return table

def gen_diff(table_old, table_new):
  table = []
  files_new = set([x[0] for x in table_new])
  files_old = set([x[0] for x in table_old])
  added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old
  if added:
    for file in added:
      file_stat = [stats for stats in table_new if file in stats]
      table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0])
  if deleted:
    for file in deleted:
      file_stat = [stats for stats in table_old if file in stats]
      table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]])
  if unchanged:
    for file in unchanged:
      file_stat_old = [stats for stats in table_old if file in stats]
      file_stat_new = [stats for stats in table_new if file in stats]
      if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0:
        table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2],
                      file_stat_new[0][2]-file_stat_old[0][2]])
  return table

def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff)

if __name__ == "__main__":
  if len(sys.argv) == 3:
    headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"]
    table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
  elif len(sys.argv) == 2:
    headers = ["Name", "Lines", "Tokens/Line"]
    table = gen_stats(sys.argv[1])
  else:
    headers = ["Name", "Lines", "Tokens/Line"]
    table = gen_stats(".")

  if table:
    if len(sys.argv) == 3:
      print("### Changes")
      print("```")
      print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"),
                     floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n")
      print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}")
      print("```")
    else:
      print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
      for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]):
        print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
      total_lines = sum([x[1] for x in table])
      print(f"\ntotal line count: {total_lines}")
      max_line_count = int(os.getenv("MAX_LINE_COUNT", "-1"))
      assert max_line_count == -1 or total_lines <= max_line_count, f"OVER {max_line_count} LINES"
move line counter to python 2023-05-30 00:21:18 +08:00			`#!/usr/bin/env python3`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`import os, sys`
move line counter to python 2023-05-30 00:21:18 +08:00			`import token`
			`import tokenize`
			`import itertools`
			`from tabulate import tabulate`

			`TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]`

datasets isn't a feature + filter docstrings (#4228) * datasets isn't a feature * filter docstrings in sz 2024-04-19 20:16:10 +08:00			`def is_docstring(t):`
			`return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')`

add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`def gen_stats(base_path="."):`
move line counter to python 2023-05-30 00:21:18 +08:00			`table = []`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):`
move line counter to python 2023-05-30 00:21:18 +08:00			`for name in files:`
			`if not name.endswith(".py"): continue`
move autogen to runtime/autogen (#3254) 2024-01-27 04:44:19 +08:00			`if 'tinygrad/runtime/autogen' in path: continue`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`filepath = os.path.join(path, name)`
			`relfilepath = os.path.relpath(filepath, base_path)`
move line counter to python 2023-05-30 00:21:18 +08:00			`with tokenize.open(filepath) as file_:`
datasets isn't a feature + filter docstrings (#4228) * datasets isn't a feature * filter docstrings in sz 2024-04-19 20:16:10 +08:00			`tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]`
hotfix: make the line counter correct 2024-01-02 03:01:22 +08:00			`token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))`
hotfix: skip 0 line count files in sz.py 2024-04-23 15:56:03 +08:00			`if line_count > 0: table.append([relfilepath, line_count, token_count/line_count])`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`return table`

			`def gen_diff(table_old, table_new):`
			`table = []`
			`files_new = set([x[0] for x in table_new])`
			`files_old = set([x[0] for x in table_old])`
			`added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old`
			`if added:`
			`for file in added:`
			`file_stat = [stats for stats in table_new if file in stats]`
			`table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0])`
			`if deleted:`
			`for file in deleted:`
			`file_stat = [stats for stats in table_old if file in stats]`
			`table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]])`
			`if unchanged:`
			`for file in unchanged:`
			`file_stat_old = [stats for stats in table_old if file in stats]`
			`file_stat_new = [stats for stats in table_new if file in stats]`
fixes (#1893) 2023-09-22 07:20:27 +08:00			`if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0:`
fix some long lines in tests (#3006) * fix some long lines in tests * better 2024-01-04 12:53:33 +08:00			`table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2],`
			`file_stat_new[0][2]-file_stat_old[0][2]])`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`return table`
move line counter to python 2023-05-30 00:21:18 +08:00
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff)`
move line counter to python 2023-05-30 00:21:18 +08:00
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`if __name__ == "__main__":`
			`if len(sys.argv) == 3:`
			`headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"]`
			`table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))`
			`elif len(sys.argv) == 2:`
			`headers = ["Name", "Lines", "Tokens/Line"]`
			`table = gen_stats(sys.argv[1])`
			`else:`
			`headers = ["Name", "Lines", "Tokens/Line"]`
			`table = gen_stats(".")`
move line counter to python 2023-05-30 00:21:18 +08:00
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`if table:`
			`if len(sys.argv) == 3:`
			`print("### Changes")`
			print("```")
fix some long lines in tests (#3006) * fix some long lines in tests * better 2024-01-04 12:53:33 +08:00			`print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"),`
			`floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n")`
add diff mode to sz.py (#1872) 2023-09-16 12:43:47 +08:00			`print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}")`
			print("```")
			`else:`
			`print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")`
			`for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]):`
			`print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")`
move graph to runtime, check line count with sz.py (#2842) * move graph to runtime, check line count with sz.py * oops, didn't save * dtype aliases * restore comment, REALCOUNT 2023-12-19 12:30:06 +08:00			`total_lines = sum([x[1] for x in table])`
			`print(f"\ntotal line count: {total_lines}")`
hotfix: don't import from tinygrad in sz.py 2023-12-19 12:49:46 +08:00			`max_line_count = int(os.getenv("MAX_LINE_COUNT", "-1"))`
add pickle support for pattern matchers [run_process_replay] (#6816) * add pickle support for pattern matchers [run_process_replay] * cleaner and all * no closures * fix tests * revert that * final * cleaner * python 3.8 fix * add round trip back * this * waste lines on this. that's the final line count * max print better * more targetted fix * regrettably add 3.8 support 2024-09-30 21:54:46 +08:00			`assert max_line_count == -1 or total_lines <= max_line_count, f"OVER {max_line_count} LINES"`