Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,5 @@ docs/generated/

TODO.md
*.DS_Store

.vale.ini
5 changes: 3 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
- define the input and output types better
- replace .apply() with __call__()?
- find a way to organize base operations
- find a way to split and merge pipelines (Graph API)
- implement all tensorflow.data operations
- ?implement sci-kit learn transformations
- Pipeline option builder
- Pipeline option builder
- Add a longer data processing example
- Add something else
33 changes: 26 additions & 7 deletions docs/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,40 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-03-06 22:31:51.251826: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
"2025-03-06 22:31:51.659464: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
"2025-03-06 22:31:51.660647: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2025-03-06 22:31:52.687297: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
},
{
"ename": "ImportError",
"evalue": "cannot import name 'Map' from 'tfdatacompose' (/home/francois/git/tf-data-compose/src/tfdatacompose/__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtensorflow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtfdatacompose\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Map, Filter, Pipeline, Skip, Take\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name 'Map' from 'tfdatacompose' (/home/francois/git/tf-data-compose/src/tfdatacompose/__init__.py)"
]
}
],
"source": [
"from typing import Tuple\n",
"\n",
"from numpy import arange\n",
"from tensorflow import Tensor\n",
"from tensorflow.python.data import Dataset\n",
"\n",
"from tfdatacompose.map.map import Map\n",
"from tfdatacompose.filter.filter import Filter\n",
"from tfdatacompose.pipeline import Pipeline\n",
"from tfdatacompose.skip import Skip\n",
"from tfdatacompose.take import Take"
"from tfdatacompose import Map, Filter, Pipeline, Skip, Take"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "pdm.backend"

[project]
name = "tfdatacompose"
version = "0.1.0"
version = "0.2.0"
authors = [
{ name = "François LaBerge", email = "fgrclaberge@gmail.com" },
]
Expand Down
41 changes: 41 additions & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from tfdatacompose.batch import Batch
from tfdatacompose.datasetoperation import DatasetOperation
from tfdatacompose.flatmap import FlatMap
from tfdatacompose.pipeline import Pipeline
from tfdatacompose.skip import Skip
from tfdatacompose.take import Take

from tfdatacompose.filter.filter import Filter
from tfdatacompose.filter.numpyfilter import NumpyFilter
from tfdatacompose.filter.pythonfunctionfilter import PythonFunctionFilter

from tfdatacompose.map.map import Map
from tfdatacompose.map.numpymap import NumpyMap
from tfdatacompose.map.pythonfunctionmap import PythonFunctionMap

from tfdatacompose.lambdamap.lambdamap import LambdaMap
from tfdatacompose.lambdamap.numpylambdamap import NumpyLambdaMap
from tfdatacompose.lambdamap.pythonfunctionlambdamap import PythonFunctionLambdaMap

from tfdatacompose.util.print import Print
from tfdatacompose.util.printshape import PrintShape

__all__ = [
"Batch",
"DatasetOperation",
"FlatMap",
"Pipeline",
"Skip",
"Take",
"Filter",
"NumpyFilter",
"PythonFunctionFilter",
"Map",
"NumpyMap",
"PythonFunctionMap",
"LambdaMap",
"NumpyLambdaMap",
"PythonFunctionLambdaMap",
"Print",
"PrintShape",
]
41 changes: 41 additions & 0 deletions src/tfdatacompose/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from tfdatacompose.batch import Batch
from tfdatacompose.datasetoperation import DatasetOperation
from tfdatacompose.flatmap import FlatMap
from tfdatacompose.pipeline import Pipeline
from tfdatacompose.skip import Skip
from tfdatacompose.take import Take

from tfdatacompose.filter.filter import Filter
from tfdatacompose.filter.numpyfilter import NumpyFilter
from tfdatacompose.filter.pythonfunctionfilter import PythonFunctionFilter

from tfdatacompose.map.map import Map
from tfdatacompose.map.numpymap import NumpyMap
from tfdatacompose.map.pythonfunctionmap import PythonFunctionMap

from tfdatacompose.lambdamap.lambdamap import LambdaMap
from tfdatacompose.lambdamap.numpylambdamap import NumpyLambdaMap
from tfdatacompose.lambdamap.pythonfunctionlambdamap import PythonFunctionLambdaMap

from tfdatacompose.util.print import Print
from tfdatacompose.util.printshape import PrintShape

__all__ = [
"Batch",
"DatasetOperation",
"FlatMap",
"Pipeline",
"Skip",
"Take",
"Filter",
"NumpyFilter",
"PythonFunctionFilter",
"Map",
"NumpyMap",
"PythonFunctionMap",
"LambdaMap",
"NumpyLambdaMap",
"PythonFunctionLambdaMap",
"Print",
"PrintShape",
]