diff --git a/README.rst b/README.rst index 94f6970..558caf5 100644 --- a/README.rst +++ b/README.rst @@ -3,8 +3,8 @@ py_stringsimjoin This project seeks to build a Python software package that provides scalable implementation of string similarity joins over two tables, for commonly used similarity measures such as Jaccard, Dice, cosine, overlap, overlap coefficient and edit distance. The package is free, open-source, and BSD-licensed. -Important links -=============== +Important links/links to ponder +============================== * Project Homepage: https://sites.google.com/site/anhaidgroup/projects/magellan/py_stringsimjoin * Code repository: https://github.com/anhaidgroup/py_stringsimjoin diff --git a/py_stringsimjoin/join/set_sim_join_cy.pyx b/py_stringsimjoin/join/set_sim_join_cy.pyx index a954b13..f601e13 100644 --- a/py_stringsimjoin/join/set_sim_join_cy.pyx +++ b/py_stringsimjoin/join/set_sim_join_cy.pyx @@ -1,5 +1,6 @@ # set similarity join +#importing importand libraries(pandas for data structure) import pandas as pd import pyprind @@ -104,7 +105,7 @@ def set_sim_join_cy(ltable, rtable, for j in range(min(m, prefix_length)): if index.index.find(tokens[j]) == index.index.end(): continue - candidates = index.index[tokens[j]] + candidates = index.index[tokens[j]] #nested loop for cand in candidates: current_overlap = candidate_overlap[cand.first] if current_overlap != -1: