From e7bd940243d71a3c976383a1e32e59acf859e31f Mon Sep 17 00:00:00 2001 From: guptaarth87 <72153219+guptaarth87@users.noreply.github.com> Date: Fri, 2 Oct 2020 22:01:58 +0530 Subject: [PATCH 1/2] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 94f6970..558caf5 100644 --- a/README.rst +++ b/README.rst @@ -3,8 +3,8 @@ py_stringsimjoin This project seeks to build a Python software package that provides scalable implementation of string similarity joins over two tables, for commonly used similarity measures such as Jaccard, Dice, cosine, overlap, overlap coefficient and edit distance. The package is free, open-source, and BSD-licensed. -Important links -=============== +Important links/links to ponder +============================== * Project Homepage: https://sites.google.com/site/anhaidgroup/projects/magellan/py_stringsimjoin * Code repository: https://github.com/anhaidgroup/py_stringsimjoin From f8c2d1e380d39a491b887684ae734a5b7cbda24c Mon Sep 17 00:00:00 2001 From: guptaarth87 <72153219+guptaarth87@users.noreply.github.com> Date: Fri, 2 Oct 2020 22:08:03 +0530 Subject: [PATCH 2/2] Update set_sim_join_cy.pyx --- py_stringsimjoin/join/set_sim_join_cy.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/py_stringsimjoin/join/set_sim_join_cy.pyx b/py_stringsimjoin/join/set_sim_join_cy.pyx index a954b13..f601e13 100644 --- a/py_stringsimjoin/join/set_sim_join_cy.pyx +++ b/py_stringsimjoin/join/set_sim_join_cy.pyx @@ -1,5 +1,6 @@ # set similarity join +#importing importand libraries(pandas for data structure) import pandas as pd import pyprind @@ -104,7 +105,7 @@ def set_sim_join_cy(ltable, rtable, for j in range(min(m, prefix_length)): if index.index.find(tokens[j]) == index.index.end(): continue - candidates = index.index[tokens[j]] + candidates = index.index[tokens[j]] #nested loop for cand in candidates: current_overlap = candidate_overlap[cand.first] if current_overlap != -1: