Skip to content

Commit 62b9362

Browse files
committed
Make something work...
1 parent d87a99f commit 62b9362

6 files changed

Lines changed: 178 additions & 31 deletions

File tree

amazon/ion/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@
3636
'writer_binary_raw_fields',
3737
'writer_buffer',
3838
'writer_text',
39+
'lazy_type',
3940
]

amazon/ion/ioncmodule.c

Lines changed: 117 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static char _err_msg[ERR_MSG_MAX_LEN];
3030
// Python 2/3 compatibility
3131
#if PY_MAJOR_VERSION >= 3
3232
#define IONC_BYTES_FORMAT "y#"
33-
#define IONC_READ_ARGS_FORMAT "OO"
33+
#define IONC_READ_ARGS_FORMAT "OOO"
3434
#define PyInt_AsSsize_t PyLong_AsSsize_t
3535
#define PyInt_AsLong PyLong_AsLong
3636
#define PyInt_FromLong PyLong_FromLong
@@ -41,7 +41,7 @@ static char _err_msg[ERR_MSG_MAX_LEN];
4141
#define PyInt_Check PyLong_Check
4242
#else
4343
#define IONC_BYTES_FORMAT "s#"
44-
#define IONC_READ_ARGS_FORMAT "OOO"
44+
#define IONC_READ_ARGS_FORMAT "OOOO"
4545
#endif
4646

4747
#if PY_VERSION_HEX < 0x02070000
@@ -53,6 +53,8 @@ static PyObject* _math_module;
5353
static PyObject* _decimal_module;
5454
static PyObject* _decimal_constructor;
5555
static PyObject* _py_timestamp_constructor;
56+
static PyObject* _lazytype_module;
57+
static PyObject* _ionpylazyobj_cls;
5658
static PyObject* _simpletypes_module;
5759
static PyObject* _ionpynull_cls;
5860
static PyObject* _ionpynull_fromvalue;
@@ -792,6 +794,28 @@ iERR ionc_write_value(hWRITER writer, PyObject* obj, PyObject* tuple_as_sexp) {
792794
IONCHECK(ionc_write_sequence(writer, obj, tuple_as_sexp));
793795
IONCHECK(ion_writer_finish_container(writer));
794796
}
797+
/* When we are going to serialize a lazy Ion object, we should check if this object holds a cached binary
798+
representation already. If so, write the cached bytes into writer's temporary buffer directly. However I didn't find
799+
an API in ion-c's ion_writer.h to append arbitrary bytes in writer's temporary buffer (correct me if I missed it) so
800+
I write an IonBlob value instead, which is wrong! It's only used for visible and debugging. Later on we should decide
801+
if/how to expose the existing API in ion-c for CPython layer so that it's able to append custom bytes into writer's
802+
value_stream instead of only be able to call the writer to writes bytes.
803+
*/
804+
else if (PyObject_IsInstance(obj, _ionpylazyobj_cls)) {
805+
Py_ssize_t len;
806+
char* c_buf;
807+
808+
// TODO check the existence of lazy buffer first. Since all lazy objects hold a buffer by my sample code... I skipped validation
809+
PyObject* lazy_buffer = PyObject_GetAttrString(obj, "lazy_buffer");
810+
811+
// Convert python bytes to C bytes
812+
if (PyBytes_AsStringAndSize(lazy_buffer, &c_buf, &len) < 0) {
813+
_FAILWITHMSG(IERR_INVALID_ARG, "Binary conversion error.");
814+
}
815+
// TODO should append the c_buf to _value_stream directly. Not an Ion BLOB!
816+
// E.g. Something like ION_PUT(writer->_typed_writer.binary._value_stream, c_buf);
817+
IONCHECK(ion_writer_write_blob(writer, (BYTE*)c_buf, len));
818+
}
795819
else {
796820
_FAILWITHMSG(IERR_INVALID_STATE, "Cannot dump arbitrary object types.");
797821
}
@@ -1503,43 +1527,108 @@ void ionc_read_iter_dealloc(PyObject *self) {
15031527
PyObject_Del(self);
15041528
}
15051529

1530+
iERR ionc_lazy_read_all(hREADER hreader, PyObject* container, BOOL in_struct, BOOL emit_bare_values, char* buffer) {
1531+
iENTER;
1532+
ION_TYPE t;
1533+
for (;;) {
1534+
IONCHECK(ion_reader_next(hreader, &t));
1535+
if (t == tid_EOF) {
1536+
assert(t == tid_EOF && "next() at end");
1537+
break;
1538+
}
1539+
PyObject* rtn;
1540+
PyObject* py_cached_bytes;
1541+
1542+
// Calculate the start position and the length of the value that writer is stand on so we can cache this bytes
1543+
// Start position
1544+
POSITION p_offset = 0;
1545+
// Value length
1546+
SIZE p_length = 0;
1547+
ion_reader_get_value_offset(hreader, &p_offset);
1548+
ion_reader_get_value_length(hreader, &p_length);
1549+
// debug
1550+
printf("p_offset is: %" PRId64 "\n", p_offset);
1551+
printf("p_length is: %" PRId32 "\n\n", p_length);
1552+
1553+
// TODO is it possible to return a memory view pointing to the specific position of the original buffer?
1554+
// Python Bytes? Py_BuildValue("y#", buffer+p_offset, p_length),
1555+
// Python MemoryView? PyMemoryView_FromMemory(buffer+p_offset, p_length, PyBUF_WRITE),
1556+
py_cached_bytes = Py_BuildValue("y#", buffer+p_offset, p_length);
1557+
1558+
// Below returns an IonPyLazyObj holding cached bytes, and is equivalence to:
1559+
// IonPyLazyObj(py_cached_bytes, t, None);
1560+
rtn = PyObject_CallFunctionObjArgs(
1561+
_ionpylazyobj_cls,
1562+
py_cached_bytes,
1563+
py_ion_type_table[ION_TYPE_INT(t) >> 8],
1564+
NULL
1565+
);
1566+
1567+
ionc_add_to_container(container, rtn, in_struct, NULL);
1568+
Py_DECREF(py_cached_bytes);
1569+
}
1570+
iRETURN;
1571+
}
1572+
15061573
/*
15071574
* Entry point of read/load functions
15081575
*/
15091576
PyObject* ionc_read(PyObject* self, PyObject *args, PyObject *kwds) {
15101577
iENTER;
15111578
PyObject *py_file = NULL; // TextIOWrapper
15121579
PyObject *emit_bare_values;
1580+
PyObject *parse_lazily;
15131581
ionc_read_Iterator *iterator = NULL;
1514-
static char *kwlist[] = {"file", "emit_bare_values", NULL};
1515-
if (!PyArg_ParseTupleAndKeywords(args, kwds, IONC_READ_ARGS_FORMAT, kwlist, &py_file, &emit_bare_values)) {
1582+
static char *kwlist[] = {"file", "emit_bare_values", "parse_lazily", NULL};
1583+
if (!PyArg_ParseTupleAndKeywords(args, kwds, IONC_READ_ARGS_FORMAT, kwlist, &py_file, &emit_bare_values, &parse_lazily)) {
15161584
FAILWITH(IERR_INVALID_ARG);
15171585
}
15181586

1519-
iterator = PyObject_New(ionc_read_Iterator, &ionc_read_IteratorType);
1520-
if (!iterator) {
1521-
FAILWITH(IERR_INTERNAL_ERROR);
1522-
}
1523-
Py_INCREF(py_file);
1587+
// Store the stream in IonPyObj until it actually needs to be serialized.
1588+
if (parse_lazily == Py_True) {
1589+
hREADER reader;
1590+
char *buffer = NULL;
1591+
long size;
1592+
PyObject *top_level_container = NULL;
1593+
PyString_AsStringAndSize(py_file, &buffer, &size);
15241594

1525-
if (!PyObject_Init((PyObject*) iterator, &ionc_read_IteratorType)) {
1526-
FAILWITH(IERR_INTERNAL_ERROR);
1527-
}
1595+
// TODO what if size is larger than SIZE ?
1596+
ION_READER_OPTIONS options;
1597+
memset(&options, 0, sizeof(options));
1598+
options.decimal_context = &dec_context;
1599+
options.max_annotation_count = ANNOTATION_MAX_LEN;
15281600

1529-
iterator->closed = FALSE;
1530-
iterator->file_handler_state.py_file = py_file;
1531-
iterator->emit_bare_values = emit_bare_values == Py_True;
1532-
memset(&iterator->reader, 0, sizeof(iterator->reader));
1533-
memset(&iterator->_reader_options, 0, sizeof(iterator->_reader_options));
1534-
iterator->_reader_options.decimal_context = &dec_context;
1601+
IONCHECK(ion_reader_open_buffer(&reader, (BYTE*)buffer, (SIZE)size, &options)); // NULL represents default reader options
15351602

1536-
IONCHECK(ion_reader_open_stream(
1537-
&iterator->reader,
1538-
&iterator->file_handler_state,
1539-
ion_read_file_stream_handler,
1540-
&iterator->_reader_options)); // NULL represents default reader options
1541-
return iterator;
1603+
top_level_container = PyList_New(0);
1604+
IONCHECK(ionc_lazy_read_all(reader, top_level_container, FALSE, emit_bare_values == Py_True, buffer));
1605+
IONCHECK(ion_reader_close(reader));
1606+
return top_level_container;
1607+
} else {
1608+
iterator = PyObject_New(ionc_read_Iterator, &ionc_read_IteratorType);
1609+
if (!iterator) {
1610+
FAILWITH(IERR_INTERNAL_ERROR);
1611+
}
1612+
Py_INCREF(py_file);
1613+
1614+
if (!PyObject_Init((PyObject*) iterator, &ionc_read_IteratorType)) {
1615+
FAILWITH(IERR_INTERNAL_ERROR);
1616+
}
1617+
1618+
iterator->closed = FALSE;
1619+
iterator->file_handler_state.py_file = py_file;
1620+
iterator->emit_bare_values = emit_bare_values == Py_True;
1621+
memset(&iterator->reader, 0, sizeof(iterator->reader));
1622+
memset(&iterator->_reader_options, 0, sizeof(iterator->_reader_options));
1623+
iterator->_reader_options.decimal_context = &dec_context;
15421624

1625+
IONCHECK(ion_reader_open_stream(
1626+
&iterator->reader,
1627+
&iterator->file_handler_state,
1628+
ion_read_file_stream_handler,
1629+
&iterator->_reader_options)); // NULL represents default reader options
1630+
return iterator;
1631+
}
15431632
fail:
15441633
if (iterator != NULL) {
15451634
Py_DECREF(py_file);
@@ -1594,8 +1683,11 @@ PyObject* ionc_init_module(void) {
15941683

15951684
_decimal_module = PyImport_ImportModule("decimal");
15961685
_decimal_constructor = PyObject_GetAttrString(_decimal_module, "Decimal");
1597-
_simpletypes_module = PyImport_ImportModule("amazon.ion.simple_types");
15981686

1687+
_lazytype_module = PyImport_ImportModule("amazon.ion.lazy_type");
1688+
_ionpylazyobj_cls = PyObject_GetAttrString(_lazytype_module, "IonPyLazyObj");
1689+
1690+
_simpletypes_module = PyImport_ImportModule("amazon.ion.simple_types");
15991691
_ionpynull_cls = PyObject_GetAttrString(_simpletypes_module, "IonPyNull");
16001692
_ionpynull_fromvalue = PyObject_GetAttrString(_ionpynull_cls, "from_value");
16011693
_ionpybool_cls = PyObject_GetAttrString(_simpletypes_module, "IonPyBool");

amazon/ion/lazy_type.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from . import simpleion
2+
from .simple_types import _IonNature, IonPyNull, IonPyList
3+
4+
5+
class IonPyLazyObj(_IonNature):
6+
"""
7+
Representation of an IonPyObj that generated by lazily_parse.
8+
IonNature had ion_type already but I put it here for test purpose.
9+
"""
10+
ion_buffer = None
11+
ion_type = None
12+
13+
def __init__(self, b, t, *args, **kwargs):
14+
super().__init__(*args, **kwargs)
15+
self.lazy_buffer = b
16+
self.lazy_type = t
17+
18+
# Wake up the lazy object, return a real IonPyObj
19+
# This might be helpful later, but is not used at all for now
20+
def wake_up(self):
21+
if self.lazy_buffer is None:
22+
return IonPyNull()
23+
else:
24+
return simpleion.loads(self.lazy_buffer)

amazon/ion/simple_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
# in Python 3.10, abstract collections have moved into their own module
2929
# for compatibility with 3.10+, first try imports from the new location
3030
# if that fails, try from the pre-3.10 location
31+
3132
try:
3233
from collections.abc import MutableMapping
3334
except:

amazon/ion/simpleion.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from amazon.ion.writer_text import text_writer
3232
from .core import IonEvent, IonEventType, IonType, ION_STREAM_END_EVENT, Timestamp, ION_VERSION_MARKER_EVENT
3333
from .exceptions import IonException
34+
from .lazy_type import IonPyLazyObj
3435
from .reader import blocking_reader, NEXT_EVENT
3536
from .reader_binary import binary_reader
3637
from .reader_managed import managed_reader
@@ -450,7 +451,8 @@ def add(obj):
450451

451452

452453
def loads(ion_str, catalog=None, single_value=True, encoding='utf-8', cls=None, object_hook=None, parse_float=None,
453-
parse_int=None, parse_constant=None, object_pairs_hook=None, use_decimal=None, parse_eagerly=True, **kw):
454+
parse_int=None, parse_constant=None, object_pairs_hook=None, use_decimal=None, parse_eagerly=True,
455+
parse_lazily=False, **kw):
454456
"""Deserialize ``ion_str``, which is a string representation of an Ion object, to a Python object using the
455457
conversion table used by load (above).
456458
@@ -489,7 +491,8 @@ def loads(ion_str, catalog=None, single_value=True, encoding='utf-8', cls=None,
489491

490492
return load(ion_buffer, catalog=catalog, single_value=single_value, encoding=encoding, cls=cls,
491493
object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant,
492-
object_pairs_hook=object_pairs_hook, use_decimal=use_decimal, parse_eagerly=parse_eagerly)
494+
object_pairs_hook=object_pairs_hook, use_decimal=use_decimal, parse_eagerly=parse_eagerly,
495+
parse_lazily=parse_lazily)
493496

494497

495498
def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp=False, omit_version_marker=False):
@@ -501,8 +504,14 @@ def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp
501504
fp.write(res)
502505

503506

504-
def load_extension(fp, single_value=True, parse_eagerly=True):
505-
iterator = ionc.ionc_read(fp, emit_bare_values=False)
507+
def load_extension(fp, single_value=True, parse_eagerly=True, parse_lazily=False):
508+
# For easier test, ignore parse_eagerly when parse_lazily is set to True.
509+
if parse_lazily and isinstance(fp, BytesIO):
510+
data = fp.read()
511+
fp.close()
512+
return ionc.ionc_read(data, emit_bare_values=False, parse_lazily=True)
513+
514+
iterator = ionc.ionc_read(fp, emit_bare_values=False, parse_lazily=False)
506515
if single_value:
507516
try:
508517
value = next(iterator)
@@ -539,9 +548,10 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, skipkeys=
539548

540549

541550
def load(fp, catalog=None, single_value=True, encoding='utf-8', cls=None, object_hook=None, parse_float=None,
542-
parse_int=None, parse_constant=None, object_pairs_hook=None, use_decimal=None, parse_eagerly=True, **kw):
551+
parse_int=None, parse_constant=None, object_pairs_hook=None, use_decimal=None, parse_eagerly=True,
552+
parse_lazily=False, **kw):
543553
if c_ext and catalog is None:
544-
return load_extension(fp, parse_eagerly=parse_eagerly, single_value=single_value)
554+
return load_extension(fp, parse_eagerly=parse_eagerly, parse_lazily=parse_lazily, single_value=single_value)
545555
else:
546556
return load_python(fp, catalog=catalog, single_value=single_value, encoding=encoding, cls=cls,
547557
object_hook=object_hook, parse_float=parse_float, parse_int=parse_int,

amazon/ion/test.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import amazon.ion.simpleion as ion
2+
3+
# Test data, its text representation is: ```[1, 2] [3]```
4+
# Usually, the original C extension loads below bytes into a top-level list holding below values E.g. [[1,2], [3]]
5+
ion_binary_bytes = b'\xe0\x01\x00\xea\xb4\x21\x01\x21\x02\xb2\x21\x03'
6+
7+
# This should return a list below:
8+
# obj = [<IonPyLazyObj holding [1,2]>, <IonPyLazyObj holding [3]>]
9+
obj = ion.loads(ion_binary_bytes, parse_lazily=True)
10+
# The first lazy object holds bytes \xb4\x21\x01\x21\x02 <- [1,2]
11+
print(f'obj[0].lazy_buffer is {obj[0].lazy_buffer}')
12+
# The second lazy object holds bytes \xb2\x21\x03 <- [3]
13+
print(f'obj[1].lazy_buffer is {obj[1].lazy_buffer}')
14+
15+
# The returned bytes is wrong because I wrote a blob instead of cached bytes, the returned bytes are:
16+
# Bytes returned \xe0\x01\x00\xea \xba \xa5 \xb4 \x21\x01 \x21\x02 \xa3 \xb2 \x21\x03
17+
# Text representation IVM [ blob([ 1, 2 ]) blob([ 3 ])]
18+
# We should take out the blob wrapper later.
19+
print(ion.dumps(obj))

0 commit comments

Comments
 (0)