diff --git a/docs/changelog.md b/docs/changelog.md index 9e2143d..81c34cc 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,21 @@ # Changelog +## 0.5.0 + +- replaced JSON serialization with cloudpickle. This allows extracting a much + wider range of objects from the notebook subprocess. +- Reference semantics have changed. + - Old behavior of `tb.get(name)` and `tb[name]`: + - a reference would be returned for non-JSON-serializable objects. + - a value would be returned for JSON-serializable objects. + - Old behavior of `tb.ref(name)` was identical to `tb.get(name)`. + - However, now almost all objects are serializable and as a result, under + the old semantics, a reference would almost never be returned. Therefore, + when a reference is desired, we now require explicitly requesting a + reference. The new behavior of `tb.get(name)` and `tb[name]` is to always + return the deserialized object and to never return a reference. The new + behavior of `tb.ref(name)` is to always return a reference. + ## 0.4.2 - Documentation and CoC updates to improve developer access (Thank you PyLadies Vancouver!) diff --git a/docs/usage/index.md b/docs/usage/index.md index f7e46bc..33f6ca3 100644 --- a/docs/usage/index.md +++ b/docs/usage/index.md @@ -63,7 +63,7 @@ my_list = ['list', 'from', 'notebook'] Reference objects to functions can be called with, -- explicit JSON serializable values (like `dict`, `list`, `int`, `float`, `str`, `bool`, etc) +- explicit cloudpickle-serializable values (almost all Python objects) - other reference objects ```{code-block} python diff --git a/requirements.txt b/requirements.txt index 0f9ff69..02ba471 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ nbformat>=5.0.4 nbclient>=0.4.0 +cloudpickle >= 2.0.0 \ No newline at end of file diff --git a/testbook/client.py b/testbook/client.py index 5dc94d3..16382a5 100644 --- a/testbook/client.py +++ b/testbook/client.py @@ -6,6 +6,7 @@ from nbclient import NotebookClient from nbclient.exceptions import CellExecutionError from nbformat.v4 import new_code_cell +import cloudpickle from .exceptions import ( TestbookCellTagNotFoundError, @@ -38,17 +39,13 @@ def ref(self, name: str) -> Union[TestbookObjectReference, Any]: # Check if exists self.inject(name, pop=True) - try: - self.inject(f"import json; json.dumps({name})", pop=True) - return self.value(name) - except Exception: - return TestbookObjectReference(self, name) + return TestbookObjectReference(self, name) def get(self, item): - return self.ref(item) + return self.value(item) def __getitem__(self, item): - return self.ref(item) + return self.value(item) @staticmethod def _construct_call_code( @@ -248,11 +245,7 @@ def inject( def value(self, code: str) -> Any: """ - Execute given code in the kernel and return JSON serializeable result. - - If the result is not JSON serializeable, it raises `TestbookAttributeError`. - This error object will also contain an attribute called `save_varname` which - can be used to create a reference object with :meth:`ref`. + Execute given code in the kernel and returns the serializeable result. Parameters ---------- @@ -276,35 +269,30 @@ def value(self, code: str) -> Any: raise TestbookExecuteResultNotFoundError( 'code provided does not produce execute_result' ) - + save_varname = random_varname() - inject_code = f""" - import json - from IPython import get_ipython - from IPython.display import JSON - - {save_varname} = get_ipython().last_execution_result.result - - json.dumps({save_varname}) - JSON({{"value" : {save_varname}}}) - """ - - try: - outputs = self.inject(inject_code, pop=True).outputs - - if outputs[0].output_type == "error": - # will receive error when `allow_errors` is set to True - raise TestbookRuntimeError( - outputs[0].evalue, outputs[0].traceback, outputs[0].ename - ) - - return outputs[0].data['application/json']['value'] - - except TestbookRuntimeError: - e = TestbookSerializeError('could not JSON serialize output') - e.save_varname = save_varname - raise e + import tempfile + with tempfile.NamedTemporaryFile() as tmp: + try: + inject_code = f""" + import cloudpickle + {save_varname} = get_ipython().last_execution_result.result + with open('{tmp.name}', 'wb') as f: + cloudpickle.dump({save_varname}, f) + """ + outputs = self.inject(inject_code, pop=True).outputs + if len(outputs) > 0 and outputs[0].output_type == "error": + # will receive error when `allow_errors` is set to True + raise TestbookRuntimeError( + outputs[0].evalue, outputs[0].traceback, outputs[0].ename + ) + with open(tmp.name, 'rb') as f: + return cloudpickle.load(f) + except TestbookRuntimeError: + e = TestbookSerializeError('could not serialize output') + e.save_varname = save_varname + raise e @contextmanager def patch(self, target, **kwargs): diff --git a/testbook/exceptions.py b/testbook/exceptions.py index 2f7f7aa..dacbe29 100644 --- a/testbook/exceptions.py +++ b/testbook/exceptions.py @@ -10,7 +10,7 @@ class TestbookCellTagNotFoundError(TestbookError): class TestbookSerializeError(TestbookError): - """Raised when output cannot be JSON serialized""" + """Raised when output cannot be serialized""" pass diff --git a/testbook/tests/test_reference.py b/testbook/tests/test_reference.py index c766aa2..21cb5a3 100644 --- a/testbook/tests/test_reference.py +++ b/testbook/tests/test_reference.py @@ -12,28 +12,28 @@ def notebook(): def test_create_reference(notebook): a = notebook.ref("a") - assert repr(a) == "[1, 2, 3]" + assert repr(a) == "'[1, 2, 3]'" -def test_create_reference_getitem(notebook): - a = notebook["a"] - assert repr(a) == "[1, 2, 3]" +def test_create_reference_resolve(notebook): + a = notebook.ref("a") + assert a.resolve() == [1, 2, 3] -def test_create_reference_get(notebook): +def test_notebook_get_value(notebook): a = notebook.get("a") - assert repr(a) == "[1, 2, 3]" + assert a == [1, 2, 3] def test_eq_in_notebook(notebook): a = notebook.ref("a") a.append(4) - assert a == [1, 2, 3, 4] + assert a.resolve() == [1, 2, 3, 4] def test_eq_in_notebook_ref(notebook): a, b = notebook.ref("a"), notebook.ref("b") - assert a == b + assert a.resolve()[:3] == b def test_function_call(notebook): @@ -43,28 +43,13 @@ def test_function_call(notebook): def test_function_call_with_ref_object(notebook): double, a = notebook.ref("double"), notebook.ref("a") - - assert double(a) == [2, 4, 6] + # a.append(4) above applied to the referenced "a" and therefore is + # reflected here + assert double(a) == [2, 4, 6, 8] -def test_reference(notebook): +def test_nontrivial_pickling(notebook): Foo = notebook.ref("Foo") - - # Check that when a non-serializeable object is returned, it returns - # a reference to that object instead - f = Foo('bar') - - assert repr(f) == "\"\"" - - # Valid attribute access - assert f.say_hello() - - # Invalid attribute access - with pytest.raises(TestbookAttributeError): - f.does_not_exist - - assert f.say_hello() == 'Hello bar!' - - # non JSON-serializeable output - with pytest.raises(TestbookSerializeError): - f.resolve() + f = Foo("bar") + assert repr(f) == "" + assert(f.say_hello() == "Hello bar!") \ No newline at end of file