-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindeed.py
More file actions
29 lines (24 loc) · 819 Bytes
/
Copy pathindeed.py
File metadata and controls
29 lines (24 loc) · 819 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import requests
from bs4 import BeautifulSoup
LIMIT = 50
URL = f"https://www.indeed.com/jobs?q=ptyhon&limit={LIMIT}"
def extract_indeed_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text,"html.parser")
pagination = soup.find("div",{"class":"pagination"})
links = pagination.find_all('a')
pages = []
for link in links[:-1]:
pages.append(int(link.string))
max_page = pages[-1]
return max_page
def extract_indeed_jobs(last_page):
jobs = []
for page in range(last_page):
result = requests.get(f"{URL}&start={page*LIMIT}")
soup = BeautifulSoup(result.text,"html.parser")
results = soup.find_all("div",{"class":"jobsearch-SerpJobCard"})
for result in results:
title = result.find("h2",{"class":"title"}).find('a')["title"]
print(title)
return jobs