-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfin.py
More file actions
81 lines (66 loc) · 2.18 KB
/
Copy pathfin.py
File metadata and controls
81 lines (66 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import sys
import requests
import pandas_datareader as pdr
import pandas as pd
import numpy as np
from functools import wraps, reduce
import urllib
import requests
from bs4 import BeautifulSoup
#--- ticker search
def search_tickers(tick, start=0, end=10):
"""search for a specified ticker"""
tickers = (pdr.get_iex_symbols()
.drop(['date', 'iexId', 'isEnabled', 'type'], axis=1)
.iloc[start:end, :]
)
str_tick = np.array(tickers[tickers.symbol == tick]).flatten()
while len(str_tick) <= 0 and end < 10000:
return search_tickers(tick, start=end, end=(1.5)*end)
if end > 8999:
try:
if 1 == 0:
pass
else:
raise IndexError('ticker: {} was not found, please refine search start position...\n'.format(tick))
except IndexError as ie:
print(ie)
except Exception:
print('uncaught exception was raise ...\n')
else:
return str_tick[0]
#--- decorator that takes a query string and creates a google search query
def url_builder(f):
@wraps(f)
def wrapper(*args, **kwargs):
func_str = f(*args, **kwargs)
base = 'https://www.google.com/search'
query = '?q='
return base+query+func_str
return wrapper
#--- wrapped function
@url_builder
def create_query(search):
key_words = ['{}+'.format(a) for a in search.split(" ")]
query = ''.join(key_words)
return query.rstrip('+')
def main():
# set the stock ticker
tkr = search_tickers(tick='AAME', start=0, end=1000)
# create the query to be passed to google
google_query = create_query(search=tkr)
r = requests.get(google_query)
data = r.text
soup = BeautifulSoup(data, "lxml")
# get all links returned from te google search
links = [s.get_text() for s in soup.find_all('cite')]
#--- search through the first link
url = links[0]
url_r = requests.get(url)
company_data = url_r.text
url_soup = BeautifulSoup(company_data, "lxml")
company_page_links = {s.get_text():s.get('href') for s in url_soup.find_all('a')}
print(company_page_links)
if __name__=='__main__':
main()