-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
63 lines (50 loc) · 1.62 KB
/
main.py
File metadata and controls
63 lines (50 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from soupselect import select
import requests
def main():
with open('postcodes.txt','r') as f:
for line in f:
postcode = line.replace(" ","").strip()
if scrape(postcode) != 'Invalid':
with open("result.txt", "a") as myfile:
myfile.write("%s\n"%postcode)
#given a post code, iterate over all the pages
def scrape(postcode,page=1):
if page == 1: print "*************** %s *******************"%postcode
post_url = 'http://finddrivinginstructor.direct.gov.uk/DSAFindNearestWebApp/findNearest.form?postcode=%s&pageNumber=%s'%(postcode,page)
resp = requests.get(post_url).text
if len(resp) < 8000:
print "Invalid post codes"
return 'Invalid'
soup = BeautifulSoup(resp)
results_list = select(soup, 'ul.results-list li')
if len(results_list) == 0:
print "No more pages left."
return "no pages left"
print "Page %s"%page
for i in results_list:
name = select(i,'h3')[0].get_text()
detail1 = select(i,'div.instructor-details')[0]
mail = select(detail1,'a')[0].get('href').split(":")[-1]
phone = select(detail1,'span')[0].get_text()
detail2 = select(i,'div.instructor-details')[1]
try:
select(detail2,'span.cpd')[0]
cpd = True
except IndexError:
cpd = False
pass
try:
select(detail2,'span.cop')[0]
cop = True
except IndexError:
cop = False
print name,mail,phone,cpd,cop
with open("go.txt", "a") as myfile1:
myfile1.write("%s|%s|%s|%s|%s|%s\n"%(postcode,name,mail,phone,cpd,cop))
return scrape(postcode,page+1)
if __name__ == '__main__':
main()
#scrape("s")