--- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200 +++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200 @@ -295,8 +295,8 @@ tokenizer.pre_tokenizer = pre_tokenizers.Sequence( [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)] ) - tokenizer.train(files=["data/big.txt"], trainer=trainer) + tokenizer.train(files=["tests/data/big.txt"], trainer=trainer) - tokenizer.save("data/tokenizer.json") + tokenizer.save("tests/data/tokenizer.json") - tokenizer.from_file("data/tokenizer.json") + tokenizer.from_file("tests/data/tokenizer.json") --- a/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:08.653593406 +0200 +++ b/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:39.206906910 +0200 @@ -40,7 +40,7 @@ def setup_gzip_files(self, train_files): with open(train_files["small"], "rt") as small: for n in range(3): - path = f"data/my-file.{n}.gz" + path = f"tests/data/my-file.{n}.gz" with gzip.open(path, "wt") as f: f.write(small.read()) @@ -87,11 +87,11 @@ # START single_gzip import gzip - with gzip.open("data/my-file.0.gz", "rt") as f: + with gzip.open("tests/data/my-file.0.gz", "rt") as f: tokenizer.train_from_iterator(f, trainer=trainer) # END single_gzip # START multi_gzip - files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"] + files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"] def gzip_iterator(): for path in files: