Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions dictknife/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import operator
from collections import defaultdict


def _to_accesssor(k):
if callable(k):
return k
elif isinstance(k, (str, bytes)):
return operator.itemgetter(k)
elif isinstance(k, (list, tuple)):
# todo: compile?
return lambda v: tuple([v.get(sk) for sk in k])
else:
raise ValueError(k)


class Options:
def __init__(self, *, missing_value=None, accessor_factory=_to_accesssor) -> None:
self.missing_value = missing_value
self.accessor_factory = accessor_factory


_default_options = Options()


def how_inner_join(left, right, left_k, right_k, *, options=_default_options):
right_cache = defaultdict(list)
for x in right:
right_cache[right_k(x)].append(x)

for lv in left:
lk = left_k(lv)
if lk not in right_cache:
continue
for rv in right_cache[lk]:
yield (lv, rv)


def how_left_outer_join(left, right, left_k, right_k, *, options=_default_options):
missing_value = options.missing_value

right_cache = defaultdict(list)
for x in right:
right_cache[right_k(x)].append(x)

for lv in left:
k = left_k(lv)
if k in right_cache:
for rv in right_cache[k]:
yield (lv, rv)
else:
yield (lv, missing_value)


def how_right_outer_join(left, right, left_k, right_k, *, options=_default_options):
for r, l in how_left_outer_join(right, left, right_k, left_k, options=options):
yield l, r


def how_full_outer_join(left, right, left_k, right_k, *, options=_default_options):
missing_value = options.missing_value

right_cache = defaultdict(list)
for x in right:
right_cache[right_k(x)].append(x)

right_used = set()

for lv in left:
lk = left_k(lv)
if lk in right_cache:
right_used.add(lk)
for rv in right_cache[lk]:
yield (lv, rv)
else:
yield (lv, missing_value)

for rv in right:
rk = right_k(rv)
if rk in right_used:
continue
yield (missing_value, rv)


def join(
left,
right,
*,
left_on=None,
right_on=None,
on=None,
how=how_inner_join,
options=_default_options,
):
assert on or (left_on and right_on)

if on is not None:
left_on = right_on = on

left_on_accessor = options.accessor_factory(left_on)
right_on_accessor = options.accessor_factory(right_on)
yield from how(left, right, left_on_accessor, right_on_accessor, options=options)
260 changes: 260 additions & 0 deletions dictknife/tests/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
import unittest
from collections import namedtuple
import itertools
import copy
import os

COLSIZE = int(os.environ.get("COLSIZE") or "60")


class JoinTests(unittest.TestCase):
def _callFUT(self, *args, **kwargs):
from dictknife.query import join

return list(join(*args, **kwargs))

def test_it(self):
from dictknife import query

class data:
x_packages = [
{"version": "2.7", "downloads": 1000},
{"version": "3.5", "downloads": 2000},
{"version": "3.6", "downloads": 3000},
{"version": "3.7", "downloads": 3000},
]
y_packages = [
{"version": "3.5", "downloads": 2000},
{"version": "3.6", "downloads": 2000},
{"version": "3.7", "downloads": 2000},
{"version": "3.8", "downloads": 500},
]

class copied:
x_packages = copy.deepcopy(data.x_packages)
y_packages = copy.deepcopy(data.y_packages)

C = namedtuple("C", "msg, args, kwargs, want")
cases = [
C(
msg="inner join",
args=["x_packages", "y_packages"],
kwargs={"on": "version"},
want=[
(
{"version": "3.5", "downloads": 2000},
{"version": "3.5", "downloads": 2000},
),
(
{"version": "3.6", "downloads": 3000},
{"version": "3.6", "downloads": 2000},
),
(
{"version": "3.7", "downloads": 3000},
{"version": "3.7", "downloads": 2000},
),
],
),
C(
msg="left outer join",
args=["x_packages", "y_packages"],
kwargs={"on": "version", "how": query.how_left_outer_join},
want=[
({"version": "2.7", "downloads": 1000}, None),
(
{"version": "3.5", "downloads": 2000},
{"version": "3.5", "downloads": 2000},
),
(
{"version": "3.6", "downloads": 3000},
{"version": "3.6", "downloads": 2000},
),
(
{"version": "3.7", "downloads": 3000},
{"version": "3.7", "downloads": 2000},
),
],
),
C(
msg="right outer join",
args=["x_packages", "y_packages"],
kwargs={"on": "version", "how": query.how_right_outer_join},
want=[
(
{"version": "3.5", "downloads": 2000},
{"version": "3.5", "downloads": 2000},
),
(
{"version": "3.6", "downloads": 3000},
{"version": "3.6", "downloads": 2000},
),
(
{"version": "3.7", "downloads": 3000},
{"version": "3.7", "downloads": 2000},
),
(None, {"version": "3.8", "downloads": 500}),
],
),
C(
msg="full outer join",
args=["x_packages", "y_packages"],
kwargs={"on": "version", "how": query.how_full_outer_join},
want=[
({"version": "2.7", "downloads": 1000}, None),
(
{"version": "3.5", "downloads": 2000},
{"version": "3.5", "downloads": 2000},
),
(
{"version": "3.6", "downloads": 3000},
{"version": "3.6", "downloads": 2000},
),
(
{"version": "3.7", "downloads": 3000},
{"version": "3.7", "downloads": 2000},
),
(None, {"version": "3.8", "downloads": 500}),
],
),
]
for c in cases:
with self.subTest(msg=c.msg, args=c.args, kwargs=c.kwargs):
args = [getattr(data, name) for name in c.args]
got = self._callFUT(*args, **c.kwargs)
self.assertTrue(
got == c.want, msg=_DifferenceReportText(got=got, want=c.want)
)
self.assertEqual(got, c.want)
self.assertTrue(data.x_packages == copied.x_packages, "not modified")
self.assertTrue(data.y_packages == copied.y_packages, "not modified")

def test_multi_keys(self):
class data:
classes = [
{"id": 1, "year": "1", "name": "A"},
{"id": 2, "year": "1", "name": "B"},
{"id": 3, "year": "1", "name": "C"},
{"id": 4, "year": "2", "name": "A"},
{"id": 5, "year": "2", "name": "B"},
]
students = [
{"id": 1, "year": "1", "class": "A", "cid": 1, "name": "foo"},
{"id": 2, "year": "1", "class": "A", "cid": 1, "name": "bar"},
{"id": 3, "year": "1", "class": "B", "cid": 2, "name": "boo"},
{"id": 4, "year": "1", "class": "C", "cid": 3, "name": "yoo"},
]

class copied:
classes = copy.deepcopy(data.classes)
students = copy.deepcopy(data.students)

C = namedtuple("C", "msg, args, kwargs, want")
cases = [
C(
msg="inner join",
args=[data.classes, data.students],
kwargs={"left_on": "id", "right_on": "cid"},
want=[
(
{"id": 1, "year": "1", "name": "A"},
{"id": 1, "year": "1", "class": "A", "cid": 1, "name": "foo"},
),
(
{"id": 1, "year": "1", "name": "A"},
{"id": 2, "year": "1", "class": "A", "cid": 1, "name": "bar"},
),
(
{"id": 2, "year": "1", "name": "B"},
{"id": 3, "year": "1", "class": "B", "cid": 2, "name": "boo"},
),
(
{"id": 3, "year": "1", "name": "C"},
{"id": 4, "year": "1", "class": "C", "cid": 3, "name": "yoo"},
),
],
),
C(
msg="inner join with multi keys",
args=[data.classes, data.students],
kwargs={"left_on": ("year", "name"), "right_on": ("year", "class")},
want=[
(
{"id": 1, "year": "1", "name": "A"},
{"id": 1, "year": "1", "class": "A", "cid": 1, "name": "foo"},
),
(
{"id": 1, "year": "1", "name": "A"},
{"id": 2, "year": "1", "class": "A", "cid": 1, "name": "bar"},
),
(
{"id": 2, "year": "1", "name": "B"},
{"id": 3, "year": "1", "class": "B", "cid": 2, "name": "boo"},
),
(
{"id": 3, "year": "1", "name": "C"},
{"id": 4, "year": "1", "class": "C", "cid": 3, "name": "yoo"},
),
],
),
C(
msg="inner join with multi keys2",
args=[data.students, data.classes],
kwargs={"left_on": ("year", "class"), "right_on": ("year", "name")},
want=[
(
{"id": 1, "year": "1", "class": "A", "cid": 1, "name": "foo"},
{"id": 1, "year": "1", "name": "A"},
),
(
{"id": 2, "year": "1", "class": "A", "cid": 1, "name": "bar"},
{"id": 1, "year": "1", "name": "A"},
),
(
{"id": 3, "year": "1", "class": "B", "cid": 2, "name": "boo"},
{"id": 2, "year": "1", "name": "B"},
),
(
{"id": 4, "year": "1", "class": "C", "cid": 3, "name": "yoo"},
{"id": 3, "year": "1", "name": "C"},
),
],
),
]
for c in cases:
with self.subTest(msg=c.msg, kwargs=c.kwargs):
got = self._callFUT(*c.args, **c.kwargs)

self.assertTrue(
got == c.want, msg=_DifferenceReportText(got=got, want=c.want)
)

self.assertTrue(data.students == copied.students, "not modified")
self.assertTrue(data.classes == copied.classes, "not modified")


class _DifferenceReportText:
def __init__(self, *, got, want):
self.got = got
self.want = want

def __str__(self):
import json

fmt = "{left:%d}\t{right:%d}" % (COLSIZE, COLSIZE)
r = [
"",
fmt.format(left="want", right="got"),
"----------------------------------------------------------------------",
]
for lhs, rhs in itertools.zip_longest(self.want, self.got):
r.append(
fmt.format(
left=json.dumps(lhs, sort_keys=True),
right=json.dumps(rhs, sort_keys=True),
)
)
return "\n".join(r)


if __name__ == "__main__":
unittest.main()