-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
28 lines (23 loc) · 665 Bytes
/
main.py
File metadata and controls
28 lines (23 loc) · 665 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import threading
from queue import Queue
from spider import spider
from urlparsing import *
from crawler1 import *
PROJECT_NAME = 'Flipkart'
HOMEPAGE = 'https://www.flipkart.com'
DOMAIN_NAME = get_domain_name(HOMEPAGE)
QUEUE_FILE = PROJECT_NAME + '/queue.txt'
CRAWLED_FILE = PROJECT_NAME + '/crawled.txt'
queue = Queue()
spider(PROJECT_NAME, HOMEPAGE, DOMAIN_NAME)
def create_jobs():
for link in file_to_set(QUEUE_FILE):
queue.put(link)
queue.join()
crawl()
def crawl():
queued_links = file_to_set(QUEUE_FILE)
if len(queued_links) > 0:
print(str(len(queued_links)) + ' links in the queue')
create_jobs()
crawl()