From aa60feda48731848540ba57b9dbd47ecfc9f19b4 Mon Sep 17 00:00:00 2001 From: terafo <19949489+terafo@users.noreply.github.com> Date: Fri, 7 Jul 2023 20:43:44 +0300 Subject: [PATCH] Fix naming conflict with huggingface datasets (#1161) * Rename in files * Move files * Moved to extra/datasets as suggested * Changes to files * Fixed stupid mistake --------- Co-authored-by: terafo --- .gitignore | 20 +++---- datasets/imagenet_download.py | 51 ------------------ docs/env_vars.md | 2 +- docs/quickstart.md | 4 +- examples/hlb_cifar10.py | 2 +- examples/hlb_cifar10_torch.py | 2 +- examples/mlperf/model_eval.py | 12 ++--- examples/mnist_gan.py | 2 +- examples/serious_mnist.py | 2 +- examples/train_efficientnet.py | 4 +- examples/train_resnet.py | 2 +- extra/augment.py | 2 +- {datasets => extra/datasets}/__init__.py | 0 {datasets => extra/datasets}/coco.py | 2 +- {datasets => extra/datasets}/imagenet.py | 2 +- extra/datasets/imagenet_download.py | 51 ++++++++++++++++++ {datasets => extra/datasets}/kits19.py | 4 +- {datasets => extra/datasets}/librispeech.py | 6 +-- .../datasets}/mnist/t10k-images-idx3-ubyte.gz | Bin .../datasets}/mnist/t10k-labels-idx1-ubyte.gz | Bin .../mnist/train-images-idx3-ubyte.gz | Bin .../mnist/train-labels-idx1-ubyte.gz | Bin {datasets => extra/datasets}/openimages.py | 2 +- .../datasets}/preprocess_imagenet.py | 2 +- {datasets => extra/datasets}/squad.py | 2 +- test/external/external_hlb_cifar.py | 2 +- test/extra/test_lr_scheduler.py | 2 +- test/models/test_end2end.py | 2 +- test/models/test_mnist.py | 2 +- 29 files changed, 92 insertions(+), 92 deletions(-) delete mode 100644 datasets/imagenet_download.py rename {datasets => extra/datasets}/__init__.py (100%) rename {datasets => extra/datasets}/coco.py (98%) rename {datasets => extra/datasets}/imagenet.py (95%) create mode 100644 extra/datasets/imagenet_download.py rename {datasets => extra/datasets}/kits19.py (98%) rename {datasets => extra/datasets}/librispeech.py (94%) rename {datasets => extra/datasets}/mnist/t10k-images-idx3-ubyte.gz (100%) rename {datasets => extra/datasets}/mnist/t10k-labels-idx1-ubyte.gz (100%) rename {datasets => extra/datasets}/mnist/train-images-idx3-ubyte.gz (100%) rename {datasets => extra/datasets}/mnist/train-labels-idx1-ubyte.gz (100%) rename {datasets => extra/datasets}/openimages.py (99%) rename {datasets => extra/datasets}/preprocess_imagenet.py (91%) rename {datasets => extra/datasets}/squad.py (98%) diff --git a/.gitignore b/.gitignore index a2c9c5d1..6bb74b33 100644 --- a/.gitignore +++ b/.gitignore @@ -20,14 +20,14 @@ recognize* disassemblers/applegpu disassemblers/cuda_ioctl_sniffer *.prof -datasets/cifar-10-python.tar.gz -datasets/librispeech/ -datasets/imagenet/ -datasets/kits19/ -datasets/squad/ -datasets/img_align_celeba* -datasets/open-images-v6-mlperf -datasets/kits/ -datasets/COCO/ -datasets/audio* +extra/datasets/cifar-10-python.tar.gz +extra/datasets/librispeech/ +extra/datasets/imagenet/ +extra/datasets/kits19/ +extra/datasets/squad/ +extra/datasets/img_align_celeba* +extra/datasets/open-images-v6-mlperf +extra/datasets/kits/ +extra/datasets/COCO/ +extra/datasets/audio* venv diff --git a/datasets/imagenet_download.py b/datasets/imagenet_download.py deleted file mode 100644 index 08b9dd64..00000000 --- a/datasets/imagenet_download.py +++ /dev/null @@ -1,51 +0,0 @@ -# Python version of https://gist.github.com/antoinebrl/7d00d5cb6c95ef194c737392ef7e476a -from extra.utils import download_file -from pathlib import Path -from tqdm import tqdm -import tarfile, os - -def imagenet_extract(file, path, small=False): - with tarfile.open(name=file) as tar: - if small: # Show progressbar only for big files - for member in tar.getmembers(): tar.extract(path=path, member=member) - else: - for member in tqdm(iterable=tar.getmembers(), total=len(tar.getmembers())): tar.extract(path=path, member=member) - tar.close() - -def imagenet_prepare_val(): - # Read in the labels file - with open(Path(__file__).parent.parent / "datasets/imagenet/imagenet_2012_validation_synset_labels.txt", 'r') as f: - labels = f.read().splitlines() - f.close() - # Get a list of images - images = os.listdir(Path(__file__).parent.parent / "datasets/imagenet/val") - images.sort() - # Create folders and move files into those - for co,dir in enumerate(labels): - os.makedirs(Path(__file__).parent.parent / "datasets/imagenet/val" / dir, exist_ok=True) - os.replace(Path(__file__).parent.parent / "datasets/imagenet/val" / images[co], Path(__file__).parent.parent / "datasets/imagenet/val" / dir / images[co]) - os.remove(Path(__file__).parent.parent / "datasets/imagenet/imagenet_2012_validation_synset_labels.txt") - -def imagenet_prepare_train(): - images = os.listdir(Path(__file__).parent.parent / "datasets/imagenet/train") - for co,tarf in enumerate(images): - # for each tar file found. Create a folder with its name. Extract into that folder. Remove tar file - if Path(Path(__file__).parent.parent / "datasets/imagenet/train" / images[co]).is_file(): - images[co] = tarf[:-4] # remove .tar from extracted tar files - os.makedirs(Path(__file__).parent.parent / "datasets/imagenet/train" / images[co], exist_ok=True) - imagenet_extract(Path(__file__).parent.parent / "datasets/imagenet/train" / tarf, Path(__file__).parent.parent / "datasets/imagenet/train" / images[co], small=True) - os.remove(Path(__file__).parent.parent / "datasets/imagenet/train" / tarf) - -if __name__ == "__main__": - os.makedirs(Path(__file__).parent.parent / "datasets/imagenet", exist_ok=True) - os.makedirs(Path(__file__).parent.parent / "datasets/imagenet/val", exist_ok=True) - os.makedirs(Path(__file__).parent.parent / "datasets/imagenet/train", exist_ok=True) - download_file("https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json", Path(__file__).parent.parent / "datasets/imagenet/imagenet_class_index.json") - download_file("https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt", Path(__file__).parent.parent / "datasets/imagenet/imagenet_2012_validation_synset_labels.txt") - download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "datasets/imagenet/ILSVRC2012_img_val.tar") # 7GB - imagenet_extract(Path(__file__).parent.parent / "datasets/imagenet/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "datasets/imagenet/val") - imagenet_prepare_val() - if os.getenv('IMGNET_TRAIN', None) is not None: - download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "datasets/imagenet/ILSVRC2012_img_train.tar") #138GB! - imagenet_extract(Path(__file__).parent.parent / "datasets/imagenet/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "datasets/imagenet/train") - imagenet_prepare_train() diff --git a/docs/env_vars.md b/docs/env_vars.md index 99119352..2dd04f55 100644 --- a/docs/env_vars.md +++ b/docs/env_vars.md @@ -188,7 +188,7 @@ Variable | Possible Value(s) | Description ---|---|--- BS | [8, 16, 32, 64, 128] | batch size to use -### datasets/imagenet_download.py +### extra/datasets/imagenet_download.py Variable | Possible Value(s) | Description ---|---|--- diff --git a/docs/quickstart.md b/docs/quickstart.md index f6d430e5..ae06caef 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -169,11 +169,11 @@ There is a simpler way to do this just by using `get_parameters(net)` from `tiny The parameters are just listed out explicitly here for clarity. Now that we have our network, loss function, and optimizer defined all we are missing is the data to train on! -There are a couple of dataset loaders in tinygrad located in [/datasets](/datasets). +There are a couple of dataset loaders in tinygrad located in [/extra/datasets](/extra/datasets). We will be using the MNIST dataset loader. ```python -from datasets import fetch_mnist +from extra.datasets import fetch_mnist ``` Now we have everything we need to start training our neural network. diff --git a/examples/hlb_cifar10.py b/examples/hlb_cifar10.py index 56e965c1..a7e993b6 100644 --- a/examples/hlb_cifar10.py +++ b/examples/hlb_cifar10.py @@ -4,7 +4,7 @@ # https://siboehm.com/articles/22/CUDA-MMM import time import numpy as np -from datasets import fetch_cifar +from extra.datasets import fetch_cifar from tinygrad import nn from tinygrad.state import get_parameters from tinygrad.nn import optim diff --git a/examples/hlb_cifar10_torch.py b/examples/hlb_cifar10_torch.py index b68f1c06..72e4e8a8 100644 --- a/examples/hlb_cifar10_torch.py +++ b/examples/hlb_cifar10_torch.py @@ -5,7 +5,7 @@ import platform from torch import nn from torch import optim -from datasets import fetch_cifar +from extra.datasets import fetch_cifar from tinygrad.helpers import getenv # allow TF32 diff --git a/examples/mlperf/model_eval.py b/examples/mlperf/model_eval.py index 566d85b9..4d67848f 100644 --- a/examples/mlperf/model_eval.py +++ b/examples/mlperf/model_eval.py @@ -25,7 +25,7 @@ def eval_resnet(): mdljit = TinyJit(mdlrun) # evaluation on the mlperf classes of the validation set from imagenet - from datasets.imagenet import iterate + from extra.datasets.imagenet import iterate from extra.helpers import cross_process BS = 64 @@ -56,7 +56,7 @@ def eval_resnet(): def eval_unet3d(): # UNet3D from models.unet3d import UNet3D - from datasets.kits19 import iterate, sliding_window_inference + from extra.datasets.kits19 import iterate, sliding_window_inference from examples.mlperf.metrics import get_dice_score mdl = UNet3D() mdl.load_from_pretrained() @@ -86,7 +86,7 @@ def eval_retinanet(): x /= input_std return x - from datasets.openimages import openimages, iterate + from extra.datasets.openimages import openimages, iterate from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from contextlib import redirect_stdout @@ -135,7 +135,7 @@ def eval_rnnt(): mdl = RNNT() mdl.load_from_pretrained() - from datasets.librispeech import iterate + from extra.datasets.librispeech import iterate from examples.mlperf.metrics import word_error_rate LABELS = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] @@ -168,7 +168,7 @@ def eval_bert(): def run(input_ids, input_mask, segment_ids): return mdl(input_ids, input_mask, segment_ids).realize() - from datasets.squad import iterate + from extra.datasets.squad import iterate from examples.mlperf.helpers import get_bert_qa_prediction from examples.mlperf.metrics import f1_score from transformers import BertTokenizer @@ -198,7 +198,7 @@ def eval_mrcnn(): from tqdm import tqdm from models.mask_rcnn import MaskRCNN from models.resnet import ResNet - from datasets.coco import BASEDIR, images, convert_prediction_to_coco_bbox, convert_prediction_to_coco_mask, accumulate_predictions_for_coco, evaluate_predictions_on_coco, iterate + from extra.datasets.coco import BASEDIR, images, convert_prediction_to_coco_bbox, convert_prediction_to_coco_mask, accumulate_predictions_for_coco, evaluate_predictions_on_coco, iterate from examples.mask_rcnn import compute_prediction_batched, Image mdl = MaskRCNN(ResNet(50, num_classes=None, stride_in_1x1=True)) mdl.load_from_pretrained() diff --git a/examples/mnist_gan.py b/examples/mnist_gan.py index 3ef893da..3a1f21f1 100644 --- a/examples/mnist_gan.py +++ b/examples/mnist_gan.py @@ -7,7 +7,7 @@ from tinygrad.state import get_parameters from tinygrad.tensor import Tensor from tinygrad.helpers import getenv from tinygrad.nn import optim -from datasets import fetch_mnist +from extra.datasets import fetch_mnist class LinearGen: def __init__(self): diff --git a/examples/serious_mnist.py b/examples/serious_mnist.py index f5697ad5..ea796de5 100644 --- a/examples/serious_mnist.py +++ b/examples/serious_mnist.py @@ -6,7 +6,7 @@ from tinygrad.state import get_parameters from tinygrad.tensor import Tensor from tinygrad.nn import BatchNorm2d, optim from tinygrad.helpers import getenv -from datasets import fetch_mnist +from extra.datasets import fetch_mnist from extra.augment import augment_img from extra.training import train, evaluate, sparse_categorical_crossentropy GPU = getenv("GPU") diff --git a/examples/train_efficientnet.py b/examples/train_efficientnet.py index b53767fe..9d769f97 100644 --- a/examples/train_efficientnet.py +++ b/examples/train_efficientnet.py @@ -7,7 +7,7 @@ from tinygrad.state import get_parameters from tinygrad.nn import optim from tinygrad.helpers import getenv from tinygrad.tensor import Tensor -from datasets import fetch_cifar +from extra.datasets import fetch_cifar from models.efficientnet import EfficientNet class TinyConvNet: @@ -46,7 +46,7 @@ if __name__ == "__main__": print(f"training with batch size {BS} for {steps} steps") if IMAGENET: - from datasets.imagenet import fetch_batch + from extra.datasets.imagenet import fetch_batch def loader(q): while 1: try: diff --git a/examples/train_resnet.py b/examples/train_resnet.py index e6c63c0c..81b2e7cd 100755 --- a/examples/train_resnet.py +++ b/examples/train_resnet.py @@ -7,7 +7,7 @@ from tinygrad.nn import optim from tinygrad.helpers import getenv from extra.training import train, evaluate from models.resnet import ResNet -from datasets import fetch_mnist +from extra.datasets import fetch_mnist class ComposeTransforms: diff --git a/extra/augment.py b/extra/augment.py index 1be046d3..c520edae 100644 --- a/extra/augment.py +++ b/extra/augment.py @@ -4,7 +4,7 @@ import os import sys sys.path.append(os.getcwd()) sys.path.append(os.path.join(os.getcwd(), 'test')) -from datasets import fetch_mnist +from extra.datasets import fetch_mnist from tqdm import trange def augment_img(X, rotate=10, px=3): diff --git a/datasets/__init__.py b/extra/datasets/__init__.py similarity index 100% rename from datasets/__init__.py rename to extra/datasets/__init__.py diff --git a/datasets/coco.py b/extra/datasets/coco.py similarity index 98% rename from datasets/coco.py rename to extra/datasets/coco.py index f44da98e..b5d18886 100644 --- a/datasets/coco.py +++ b/extra/datasets/coco.py @@ -12,7 +12,7 @@ iou = _mask.iou merge = _mask.merge frPyObjects = _mask.frPyObjects -BASEDIR = pathlib.Path(__file__).parent.parent / "datasets" / "COCO" +BASEDIR = pathlib.Path(__file__).parent.parent / "extra" / "datasets" / "COCO" BASEDIR.mkdir(exist_ok=True) def create_dict(key_row, val_row, rows): return {row[key_row]:row[val_row] for row in rows} diff --git a/datasets/imagenet.py b/extra/datasets/imagenet.py similarity index 95% rename from datasets/imagenet.py rename to extra/datasets/imagenet.py index 292033ff..5048314b 100644 --- a/datasets/imagenet.py +++ b/extra/datasets/imagenet.py @@ -5,7 +5,7 @@ import numpy as np from PIL import Image import functools, pathlib -BASEDIR = pathlib.Path(__file__).parent.parent / "datasets/imagenet" +BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/imagenet" ci = json.load(open(BASEDIR / "imagenet_class_index.json")) cir = {v[0]: int(k) for k,v in ci.items()} diff --git a/extra/datasets/imagenet_download.py b/extra/datasets/imagenet_download.py new file mode 100644 index 00000000..ebb017e4 --- /dev/null +++ b/extra/datasets/imagenet_download.py @@ -0,0 +1,51 @@ +# Python version of https://gist.github.com/antoinebrl/7d00d5cb6c95ef194c737392ef7e476a +from extra.utils import download_file +from pathlib import Path +from tqdm import tqdm +import tarfile, os + +def imagenet_extract(file, path, small=False): + with tarfile.open(name=file) as tar: + if small: # Show progressbar only for big files + for member in tar.getmembers(): tar.extract(path=path, member=member) + else: + for member in tqdm(iterable=tar.getmembers(), total=len(tar.getmembers())): tar.extract(path=path, member=member) + tar.close() + +def imagenet_prepare_val(): + # Read in the labels file + with open(Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt", 'r') as f: + labels = f.read().splitlines() + f.close() + # Get a list of images + images = os.listdir(Path(__file__).parent.parent / "extra/datasets/imagenet/val") + images.sort() + # Create folders and move files into those + for co,dir in enumerate(labels): + os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/val" / dir, exist_ok=True) + os.replace(Path(__file__).parent.parent / "extra/datasets/imagenet/val" / images[co], Path(__file__).parent.parent / "extra/datasets/imagenet/val" / dir / images[co]) + os.remove(Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt") + +def imagenet_prepare_train(): + images = os.listdir(Path(__file__).parent.parent / "extra/datasets/imagenet/train") + for co,tarf in enumerate(images): + # for each tar file found. Create a folder with its name. Extract into that folder. Remove tar file + if Path(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co]).is_file(): + images[co] = tarf[:-4] # remove .tar from extracted tar files + os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co], exist_ok=True) + imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / tarf, Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co], small=True) + os.remove(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / tarf) + +if __name__ == "__main__": + os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet", exist_ok=True) + os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/val", exist_ok=True) + os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/train", exist_ok=True) + download_file("https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json", Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_class_index.json") + download_file("https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt", Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt") + download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_val.tar") # 7GB + imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/val") + imagenet_prepare_val() + if os.getenv('IMGNET_TRAIN', None) is not None: + download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_train.tar") #138GB! + imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/train") + imagenet_prepare_train() diff --git a/datasets/kits19.py b/extra/datasets/kits19.py similarity index 98% rename from datasets/kits19.py rename to extra/datasets/kits19.py index bd9e0a5e..b49d3cc0 100644 --- a/datasets/kits19.py +++ b/extra/datasets/kits19.py @@ -9,7 +9,7 @@ import torch import torch.nn.functional as F from tinygrad.tensor import Tensor -BASEDIR = Path(__file__).parent.parent.resolve() / "datasets" / "kits19" / "data" +BASEDIR = Path(__file__).parent.parent.resolve() / "extra" / "datasets" / "kits19" / "data" """ To download the dataset: @@ -19,7 +19,7 @@ cd kits19 pip3 install -r requirements.txt python3 -m starter_code.get_imaging cd .. -mv kits datasets +mv kits extra/datasets ``` """ diff --git a/datasets/librispeech.py b/extra/datasets/librispeech.py similarity index 94% rename from datasets/librispeech.py rename to extra/datasets/librispeech.py index 7b77975c..2e85a335 100644 --- a/datasets/librispeech.py +++ b/extra/datasets/librispeech.py @@ -5,7 +5,7 @@ import librosa import soundfile """ -The dataset has to be downloaded manually from https://www.openslr.org/12/ and put in `datasets/librispeech`. +The dataset has to be downloaded manually from https://www.openslr.org/12/ and put in `extra/datasets/librispeech`. For mlperf validation the dev-clean dataset is used. Then all the flacs have to be converted to wav using something like: @@ -13,9 +13,9 @@ Then all the flacs have to be converted to wav using something like: for file in $(find * | grep flac); do ffmpeg -i $file -ar 16k "$(dirname $file)/$(basename $file .flac).wav"; done ``` -Then this [file](https://github.com/mlcommons/inference/blob/master/speech_recognition/rnnt/dev-clean-wav.json) has to also be put in `datasets/librispeech`. +Then this [file](https://github.com/mlcommons/inference/blob/master/speech_recognition/rnnt/dev-clean-wav.json) has to also be put in `extra/datasets/librispeech`. """ -BASEDIR = pathlib.Path(__file__).parent.parent / "datasets/librispeech" +BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/librispeech" with open(BASEDIR / "dev-clean-wav.json") as f: ci = json.load(f) diff --git a/datasets/mnist/t10k-images-idx3-ubyte.gz b/extra/datasets/mnist/t10k-images-idx3-ubyte.gz similarity index 100% rename from datasets/mnist/t10k-images-idx3-ubyte.gz rename to extra/datasets/mnist/t10k-images-idx3-ubyte.gz diff --git a/datasets/mnist/t10k-labels-idx1-ubyte.gz b/extra/datasets/mnist/t10k-labels-idx1-ubyte.gz similarity index 100% rename from datasets/mnist/t10k-labels-idx1-ubyte.gz rename to extra/datasets/mnist/t10k-labels-idx1-ubyte.gz diff --git a/datasets/mnist/train-images-idx3-ubyte.gz b/extra/datasets/mnist/train-images-idx3-ubyte.gz similarity index 100% rename from datasets/mnist/train-images-idx3-ubyte.gz rename to extra/datasets/mnist/train-images-idx3-ubyte.gz diff --git a/datasets/mnist/train-labels-idx1-ubyte.gz b/extra/datasets/mnist/train-labels-idx1-ubyte.gz similarity index 100% rename from datasets/mnist/train-labels-idx1-ubyte.gz rename to extra/datasets/mnist/train-labels-idx1-ubyte.gz diff --git a/datasets/openimages.py b/extra/datasets/openimages.py similarity index 99% rename from datasets/openimages.py rename to extra/datasets/openimages.py index 11aee4d0..641b6bd6 100644 --- a/datasets/openimages.py +++ b/extra/datasets/openimages.py @@ -11,7 +11,7 @@ from tqdm import tqdm import pandas as pd import concurrent.futures -BASEDIR = pathlib.Path(__file__).parent.parent / "datasets/open-images-v6-mlperf" +BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/open-images-v6-mlperf" BUCKET_NAME = "open-images-dataset" BBOX_ANNOTATIONS_URL = "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv" MAP_CLASSES_URL = "https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv" diff --git a/datasets/preprocess_imagenet.py b/extra/datasets/preprocess_imagenet.py similarity index 91% rename from datasets/preprocess_imagenet.py rename to extra/datasets/preprocess_imagenet.py index cf126f14..69253979 100644 --- a/datasets/preprocess_imagenet.py +++ b/extra/datasets/preprocess_imagenet.py @@ -1,6 +1,6 @@ from tinygrad.helpers import dtypes from tinygrad.tensor import Tensor -from datasets.imagenet import iterate, get_val_files +from extra.datasets.imagenet import iterate, get_val_files if __name__ == "__main__": #sz = len(get_val_files()) diff --git a/datasets/squad.py b/extra/datasets/squad.py similarity index 98% rename from datasets/squad.py rename to extra/datasets/squad.py index 495b90c9..a69ee986 100644 --- a/datasets/squad.py +++ b/extra/datasets/squad.py @@ -5,7 +5,7 @@ from transformers import BertTokenizer import numpy as np from extra.utils import download_file -BASEDIR = Path(__file__).parent.parent / "datasets/squad" +BASEDIR = Path(__file__).parent.parent / "extra/datasets/squad" def init_dataset(): os.makedirs(BASEDIR, exist_ok=True) download_file("https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json", BASEDIR / "dev-v1.1.json") diff --git a/test/external/external_hlb_cifar.py b/test/external/external_hlb_cifar.py index 6376c00e..3eababf3 100644 --- a/test/external/external_hlb_cifar.py +++ b/test/external/external_hlb_cifar.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from examples.hlb_cifar10 import SpeedyResNet, fetch_batch from examples.hlb_cifar10_torch import SpeedyResNet as SpeedyResNetTorch -from datasets import fetch_cifar +from extra.datasets import fetch_cifar from test.models.test_end2end import compare_tiny_torch if __name__ == "__main__": diff --git a/test/extra/test_lr_scheduler.py b/test/extra/test_lr_scheduler.py index 3243b16f..1e39c3d4 100644 --- a/test/extra/test_lr_scheduler.py +++ b/test/extra/test_lr_scheduler.py @@ -6,7 +6,7 @@ from tinygrad.state import get_parameters from tinygrad.nn.optim import Adam from extra.lr_scheduler import MultiStepLR, ReduceLROnPlateau, CosineAnnealingLR, OneCycleLR from extra.training import train, evaluate -from datasets import fetch_mnist +from extra.datasets import fetch_mnist np.random.seed(1337) Tensor.manual_seed(1337) diff --git a/test/models/test_end2end.py b/test/models/test_end2end.py index b9021343..09a7f0ed 100644 --- a/test/models/test_end2end.py +++ b/test/models/test_end2end.py @@ -5,7 +5,7 @@ import numpy as np from tinygrad.state import get_parameters, get_state_dict from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d from tinygrad.tensor import Tensor -from datasets import fetch_mnist +from extra.datasets import fetch_mnist def compare_tiny_torch(model, model_torch, X, Y): Tensor.training = True diff --git a/test/models/test_mnist.py b/test/models/test_mnist.py index b47aacb1..3c975b66 100644 --- a/test/models/test_mnist.py +++ b/test/models/test_mnist.py @@ -5,7 +5,7 @@ from tinygrad.state import get_parameters from tinygrad.tensor import Tensor, Device from tinygrad.nn import optim, BatchNorm2d from extra.training import train, evaluate -from datasets import fetch_mnist +from extra.datasets import fetch_mnist # load the mnist dataset X_train, Y_train, X_test, Y_test = fetch_mnist()