Skip to content

Commit 82953d0

Browse files
VinciGit00claude
andcommitted
feat: rewrite all examples for v2 API surface
30 comprehensive examples covering every v2 endpoint: Scrape (5): markdown, html, screenshot, fetch config, async concurrent Extract (6): basic, pydantic schema, json schema, fetch config, llm config, async Search (4): basic, with schema, num results, async concurrent Schema (2): generate, refine existing Crawl (5): basic with polling, patterns, fetch config, stop/resume, async Monitor (5): create, with schema, with config, manage lifecycle, async History (1): filters and pagination Credits (2): sync, async All examples moved to root /examples/ directory (flat structure). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 01a1e72 commit 82953d0

39 files changed

Lines changed: 789 additions & 162 deletions

examples/async_crawl_example.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""
2+
Async crawl example.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from scrapegraph_py import AsyncClient
9+
10+
11+
async def main():
12+
async with AsyncClient() as client:
13+
# Start crawl
14+
job = await client.crawl.start(
15+
"https://example.com",
16+
depth=2,
17+
max_pages=5,
18+
)
19+
print("Crawl started:", json.dumps(job, indent=2))
20+
21+
# Poll for completion
22+
crawl_id = job["id"]
23+
while True:
24+
status = await client.crawl.status(crawl_id)
25+
print(f"Status: {status.get('status')}")
26+
if status.get("status") in ("completed", "failed"):
27+
break
28+
await asyncio.sleep(2)
29+
30+
print("\nResult:", json.dumps(status, indent=2))
31+
32+
33+
asyncio.run(main())

examples/async_credits_example.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""
2+
Async credits check.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from scrapegraph_py import AsyncClient
9+
10+
11+
async def main():
12+
async with AsyncClient() as client:
13+
credits = await client.credits()
14+
print(json.dumps(credits, indent=2))
15+
16+
17+
asyncio.run(main())

examples/async_extract_example.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
Async extract example - extract data from multiple pages concurrently.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from pydantic import BaseModel, Field
9+
10+
from scrapegraph_py import AsyncClient
11+
12+
13+
class PageInfo(BaseModel):
14+
title: str = Field(description="Page title")
15+
description: str = Field(description="Brief description of the page content")
16+
17+
18+
async def main():
19+
async with AsyncClient() as client:
20+
urls = [
21+
"https://example.com",
22+
"https://httpbin.org/html",
23+
]
24+
25+
tasks = [
26+
client.extract(
27+
url=url,
28+
prompt="Extract the page title and a brief description",
29+
output_schema=PageInfo,
30+
)
31+
for url in urls
32+
]
33+
results = await asyncio.gather(*tasks)
34+
35+
for url, result in zip(urls, results):
36+
print(f"\n=== {url} ===")
37+
print(json.dumps(result, indent=2))
38+
39+
40+
asyncio.run(main())

examples/async_monitor_example.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""
2+
Async monitor example.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from scrapegraph_py import AsyncClient
9+
10+
11+
async def main():
12+
async with AsyncClient() as client:
13+
# Create a monitor
14+
monitor = await client.monitor.create(
15+
name="Async Price Tracker",
16+
url="https://example.com/products",
17+
prompt="Extract product prices",
18+
cron="0 12 * * *", # Every day at noon
19+
)
20+
print("Created:", json.dumps(monitor, indent=2))
21+
22+
# List all monitors
23+
all_monitors = await client.monitor.list()
24+
print("\nAll monitors:", json.dumps(all_monitors, indent=2))
25+
26+
27+
asyncio.run(main())

examples/async_scrape_example.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""
2+
Async scrape example - scrape multiple pages concurrently.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from scrapegraph_py import AsyncClient
9+
10+
11+
async def main():
12+
async with AsyncClient() as client:
13+
# Scrape multiple pages concurrently
14+
urls = [
15+
"https://example.com",
16+
"https://httpbin.org/html",
17+
]
18+
19+
tasks = [client.scrape(url) for url in urls]
20+
results = await asyncio.gather(*tasks)
21+
22+
for url, result in zip(urls, results):
23+
print(f"\n=== {url} ===")
24+
print(json.dumps(result, indent=2))
25+
26+
27+
asyncio.run(main())

examples/async_search_example.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""
2+
Async search example - run multiple searches concurrently.
3+
"""
4+
5+
import asyncio
6+
import json
7+
8+
from scrapegraph_py import AsyncClient
9+
10+
11+
async def main():
12+
async with AsyncClient() as client:
13+
queries = [
14+
"best python frameworks 2025",
15+
"top javascript libraries 2025",
16+
]
17+
18+
tasks = [client.search(q, num_results=3) for q in queries]
19+
results = await asyncio.gather(*tasks)
20+
21+
for query, result in zip(queries, results):
22+
print(f"\n=== {query} ===")
23+
print(json.dumps(result, indent=2))
24+
25+
26+
asyncio.run(main())

examples/crawl_basic_example.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Crawl a website and get pages as markdown.
3+
4+
The crawl endpoint discovers and fetches multiple pages from a website,
5+
starting from a given URL and following links up to a specified depth.
6+
"""
7+
8+
import json
9+
import time
10+
11+
from scrapegraph_py import Client
12+
13+
client = Client() # uses SGAI_API_KEY env var
14+
15+
# Start the crawl
16+
job = client.crawl.start(
17+
"https://example.com",
18+
depth=2,
19+
max_pages=5,
20+
format="markdown",
21+
)
22+
print("Crawl started:", json.dumps(job, indent=2))
23+
24+
# Poll for status
25+
crawl_id = job["id"]
26+
while True:
27+
status = client.crawl.status(crawl_id)
28+
print(f"Status: {status.get('status')}")
29+
if status.get("status") in ("completed", "failed"):
30+
break
31+
time.sleep(2)
32+
33+
print("\nFinal result:", json.dumps(status, indent=2))
34+
35+
client.close()
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""
2+
Stop and resume a crawl job.
3+
4+
You can stop a running crawl and resume it later.
5+
"""
6+
7+
import json
8+
9+
from scrapegraph_py import Client
10+
11+
client = Client() # uses SGAI_API_KEY env var
12+
13+
# Start a crawl
14+
job = client.crawl.start("https://example.com", depth=3, max_pages=50)
15+
crawl_id = job["id"]
16+
print("Crawl started:", crawl_id)
17+
18+
# Stop the crawl
19+
stopped = client.crawl.stop(crawl_id)
20+
print("Stopped:", json.dumps(stopped, indent=2))
21+
22+
# Resume the crawl later
23+
resumed = client.crawl.resume(crawl_id)
24+
print("Resumed:", json.dumps(resumed, indent=2))
25+
26+
client.close()
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""
2+
Crawl with custom fetch configuration.
3+
4+
Use FetchConfig to enable stealth mode, JS rendering, etc. for all
5+
pages during the crawl.
6+
"""
7+
8+
import json
9+
10+
from scrapegraph_py import Client, FetchConfig
11+
12+
client = Client() # uses SGAI_API_KEY env var
13+
14+
job = client.crawl.start(
15+
"https://example.com",
16+
depth=2,
17+
max_pages=10,
18+
format="html",
19+
fetch_config=FetchConfig(
20+
stealth=True,
21+
render_js=True,
22+
wait_ms=1000,
23+
),
24+
)
25+
print("Crawl started:", json.dumps(job, indent=2))
26+
27+
client.close()
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
Crawl a website with URL pattern filtering.
3+
4+
Use include_patterns and exclude_patterns to control which pages
5+
the crawler visits. Patterns support * (any chars) and ** (any path segments).
6+
"""
7+
8+
import json
9+
10+
from scrapegraph_py import Client
11+
12+
client = Client() # uses SGAI_API_KEY env var
13+
14+
job = client.crawl.start(
15+
"https://example.com",
16+
depth=3,
17+
max_pages=20,
18+
format="markdown",
19+
include_patterns=["/blog/*", "/docs/**"],
20+
exclude_patterns=["/admin/*", "/api/*"],
21+
)
22+
print("Crawl started:", json.dumps(job, indent=2))
23+
24+
client.close()

0 commit comments

Comments
 (0)