From 78b3eb4c8881015bae429a094ff6b4a880873629 Mon Sep 17 00:00:00 2001 From: Francois LaBerge Date: Thu, 6 Mar 2025 22:38:42 -0500 Subject: [PATCH 1/2] simplify the imports --- .gitignore | 2 ++ TODO.md | 5 +++-- docs/demo.ipynb | 33 ++++++++++++++++++++++------ src/__init__.py | 41 +++++++++++++++++++++++++++++++++++ src/tfdatacompose/__init__.py | 41 +++++++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index f5753bd..aacd943 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,5 @@ docs/generated/ TODO.md *.DS_Store + +.vale.ini diff --git a/TODO.md b/TODO.md index 885b3f7..28082d6 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,8 @@ - define the input and output types better -- replace .apply() with __call__()? - find a way to organize base operations - find a way to split and merge pipelines (Graph API) - implement all tensorflow.data operations - ?implement sci-kit learn transformations -- Pipeline option builder \ No newline at end of file +- Pipeline option builder +- Add a longer data processing example +- Add something else diff --git a/docs/demo.ipynb b/docs/demo.ipynb index 0d2e05b..479c85b 100644 --- a/docs/demo.ipynb +++ b/docs/demo.ipynb @@ -16,9 +16,32 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-03-06 22:31:51.251826: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025-03-06 22:31:51.659464: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025-03-06 22:31:51.660647: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2025-03-06 22:31:52.687297: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + }, + { + "ename": "ImportError", + "evalue": "cannot import name 'Map' from 'tfdatacompose' (/home/francois/git/tf-data-compose/src/tfdatacompose/__init__.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtfdatacompose\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Map, Filter, Pipeline, Skip, Take\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'Map' from 'tfdatacompose' (/home/francois/git/tf-data-compose/src/tfdatacompose/__init__.py)" + ] + } + ], "source": [ "from typing import Tuple\n", "\n", @@ -26,11 +49,7 @@ "from tensorflow import Tensor\n", "from tensorflow.python.data import Dataset\n", "\n", - "from tfdatacompose.map.map import Map\n", - "from tfdatacompose.filter.filter import Filter\n", - "from tfdatacompose.pipeline import Pipeline\n", - "from tfdatacompose.skip import Skip\n", - "from tfdatacompose.take import Take" + "from tfdatacompose import Map, Filter, Pipeline, Skip, Take" ] }, { diff --git a/src/__init__.py b/src/__init__.py index e69de29..552bf40 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,41 @@ +from tfdatacompose.batch import Batch +from tfdatacompose.datasetoperation import DatasetOperation +from tfdatacompose.flatmap import FlatMap +from tfdatacompose.pipeline import Pipeline +from tfdatacompose.skip import Skip +from tfdatacompose.take import Take + +from tfdatacompose.filter.filter import Filter +from tfdatacompose.filter.numpyfilter import NumpyFilter +from tfdatacompose.filter.pythonfunctionfilter import PythonFunctionFilter + +from tfdatacompose.map.map import Map +from tfdatacompose.map.numpymap import NumpyMap +from tfdatacompose.map.pythonfunctionmap import PythonFunctionMap + +from tfdatacompose.lambdamap.lambdamap import LambdaMap +from tfdatacompose.lambdamap.numpylambdamap import NumpyLambdaMap +from tfdatacompose.lambdamap.pythonfunctionlambdamap import PythonFunctionLambdaMap + +from tfdatacompose.util.print import Print +from tfdatacompose.util.printshape import PrintShape + +__all__ = [ + "Batch", + "DatasetOperation", + "FlatMap", + "Pipeline", + "Skip", + "Take", + "Filter", + "NumpyFilter", + "PythonFunctionFilter", + "Map", + "NumpyMap", + "PythonFunctionMap", + "LambdaMap", + "NumpyLambdaMap", + "PythonFunctionLambdaMap", + "Print", + "PrintShape", +] diff --git a/src/tfdatacompose/__init__.py b/src/tfdatacompose/__init__.py index e69de29..552bf40 100644 --- a/src/tfdatacompose/__init__.py +++ b/src/tfdatacompose/__init__.py @@ -0,0 +1,41 @@ +from tfdatacompose.batch import Batch +from tfdatacompose.datasetoperation import DatasetOperation +from tfdatacompose.flatmap import FlatMap +from tfdatacompose.pipeline import Pipeline +from tfdatacompose.skip import Skip +from tfdatacompose.take import Take + +from tfdatacompose.filter.filter import Filter +from tfdatacompose.filter.numpyfilter import NumpyFilter +from tfdatacompose.filter.pythonfunctionfilter import PythonFunctionFilter + +from tfdatacompose.map.map import Map +from tfdatacompose.map.numpymap import NumpyMap +from tfdatacompose.map.pythonfunctionmap import PythonFunctionMap + +from tfdatacompose.lambdamap.lambdamap import LambdaMap +from tfdatacompose.lambdamap.numpylambdamap import NumpyLambdaMap +from tfdatacompose.lambdamap.pythonfunctionlambdamap import PythonFunctionLambdaMap + +from tfdatacompose.util.print import Print +from tfdatacompose.util.printshape import PrintShape + +__all__ = [ + "Batch", + "DatasetOperation", + "FlatMap", + "Pipeline", + "Skip", + "Take", + "Filter", + "NumpyFilter", + "PythonFunctionFilter", + "Map", + "NumpyMap", + "PythonFunctionMap", + "LambdaMap", + "NumpyLambdaMap", + "PythonFunctionLambdaMap", + "Print", + "PrintShape", +] From 9cfb70ed0fe6a38333322ec296f28e151cb2e8eb Mon Sep 17 00:00:00 2001 From: Francois LaBerge Date: Thu, 6 Mar 2025 22:38:56 -0500 Subject: [PATCH 2/2] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9ebcfd4..d693274 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "tfdatacompose" -version = "0.1.0" +version = "0.2.0" authors = [ { name = "François LaBerge", email = "fgrclaberge@gmail.com" }, ]