-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch.py
More file actions
113 lines (90 loc) · 3.13 KB
/
search.py
File metadata and controls
113 lines (90 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# import json
import time
import sys
from elasticsearch import Elasticsearch
# from elasticsearch.helpers import bulk
import tensorflow as tf
import tensorflow_hub as hub
def connectToES(host="localhost", port=9200):
"""
:param host: Host Address where ElasticSearch is installed
:type host: String
:param port: Port Address for Elastic Search
:type port: int
:return: Elastic Search instance
:rtype: Elasticsearch
"""
es_instance = Elasticsearch([{"host": host, "port": port}])
print("[TESTING] Connection to ElasticSearch Server")
if es_instance.ping():
print("[INFO] Connected to Elastic Search!")
return es_instance
else:
print("[ERROR] Could not connect to Elastic Search")
sys.exit(500)
def lexicalSearch(es_instance, q):
"""
:param es_instance: Elastic Search instance
:type es_instance: Elasticsearch
:param q: Query Question
:type q: String
:return: All the question matches with query questions
:rtype: List[Questions]
"""
# Searching based on Question title
search_criteria = {
'query': {
"match": {
'title': q
}
}
}
res = es_instance.search(index='questions-index', body=search_criteria)
print("[INFO] Lexical aka KeyWord Search : \n ")
for hit in res['hits']['hits']:
print(str(hit['_score']) + "\t" + hit['_source']['title'])
print("*" * 70)
def semantic_search_by_vector_similarity(es_instance, q, tfmodel):
"""
:param es_instance: Elastic Search instance
:type es_instance: Elasticsearch
:param q: Query Question
:type q: String
:param tfmodel: Tensorflow pretrained Model
:type tfmodel: tf.model
:return: All the question matches semantically based on Vector representation
:rtype: List<Questions>
"""
query_vector = tf.make_ndarray(tf.make_tensor_proto(tfmodel([q]))).tolist()[0]
b = {"query": {
"script_score": {
"query": {
"match_all": {}
},
"script": {
"source": "cosineSimilarity(params.query_vector, 'title_vector') + 1.0",
"params": {"query_vector": query_vector}
}
}
}
}
# print(json.dumps(b,indent=4))
res = es_instance.search(index='questions-index', body=b)
print("[INFO] Semantic Similarity Search:\n")
for hit in res['hits']['hits']:
print(str(hit['_score']) + "\t" + hit['_source']['title'])
print("*********************************************************************************")
if __name__ == "__main__":
es = connectToES("localhost", 9200)
# model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
model = hub.load("./data/USE4/")
while 100:
input_query = input("Enter Query Question : ")
start = time.time()
if input_query == "END":
break
print("Query : ".format(input_query))
lexicalSearch(es, input_query)
semantic_search_by_vector_similarity(es, input_query, model)
end = time.time()
print("Total Time taken {} ".format(end - start))