-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
40 lines (29 loc) · 971 Bytes
/
parser.py
File metadata and controls
40 lines (29 loc) · 971 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from lxml import html
from bs4 import BeautifulSoup
import urllib3
import openpyxl as xl
import argparse
class parserFb():
def __init__(self, path):
self.http = urllib3.PoolManager()
self.path = path
# СКРАППИНГ СТРАНИЦ
def load_www(self, uri):
url = uri[0].encode('cp1251')
r = self.http.request('GET', url)
data = r.data.decode('cp1251').encode('utf8')
self.parse_html(html_data=data)
return
def parse_html(self, html_data):
soup = BeautifulSoup(html_data, "html.parser")
pass
def load_urls_from_xlsx(self, path):
pass
def start(self, path):
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=u'Укажите путь к файлу')
parser.add_argument('--path',help='Путь к файлу со ссылками')
args = parser.parse_args()
parserFb = (args.path)
parserFb.start()