Skip to content

Commit 9d4ff16

Browse files
VinciGit00claude
andcommitted
feat!: migrate python SDK to v2 API surface
Port the Python SDK to the new v2 API surface, mirroring scrapegraph-js PR #11. Breaking changes: - smartscraper -> extract (POST /api/v1/extract) - searchscraper -> search (POST /api/v1/search) - scrape now uses format-specific config (markdown/html/screenshot/branding) - crawl/monitor are now namespaced: client.crawl.start(), client.monitor.create() - Removed: markdownify, agenticscraper, sitemap, healthz, feedback, scheduled jobs - Auth: sends both Authorization: Bearer and SGAI-APIKEY headers - Added X-SDK-Version header, base_url parameter for custom endpoints - Version bumped to 2.0.0 Tested against dev API (https://sgai-api-dev-v2.onrender.com/api/v1/scrape): - Scrape markdown: returns markdown content successfully - Scrape html: returns content successfully - All 72 unit tests pass with 81% coverage Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4ad9c25 commit 9d4ff16

42 files changed

Lines changed: 1820 additions & 10137 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

scrapegraph-py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "scrapegraph_py"
3-
version = "1.12.2"
3+
version = "2.0.0"
44
description = "ScrapeGraph Python SDK for API"
55
authors = [
66
{ name = "Marco Vinciguerra", email = "marco@scrapegraphai.com" },
Lines changed: 62 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,62 @@
1-
"""
2-
ScrapeGraphAI Python SDK
3-
4-
A comprehensive Python SDK for the ScrapeGraphAI API, providing both synchronous
5-
and asynchronous clients for all API endpoints.
6-
7-
Main Features:
8-
- SmartScraper: AI-powered web scraping with structured data extraction
9-
- SearchScraper: Web research across multiple sources
10-
- Agentic Scraper: Automated browser interactions and form filling
11-
- Crawl: Website crawling with AI extraction or markdown conversion
12-
- Markdownify: Convert web pages to clean markdown
13-
- Schema Generation: AI-assisted schema creation for data extraction
14-
- Scheduled Jobs: Automate recurring scraping tasks
15-
16-
Quick Start:
17-
>>> from scrapegraph_py import Client
18-
>>>
19-
>>> # Initialize client from environment variables
20-
>>> client = Client.from_env()
21-
>>>
22-
>>> # Basic scraping
23-
>>> result = client.smartscraper(
24-
... website_url="https://example.com",
25-
... user_prompt="Extract all product information"
26-
... )
27-
>>>
28-
>>> # With context manager
29-
>>> with Client.from_env() as client:
30-
... result = client.scrape(website_url="https://example.com")
31-
32-
Async Usage:
33-
>>> import asyncio
34-
>>> from scrapegraph_py import AsyncClient
35-
>>>
36-
>>> async def main():
37-
... async with AsyncClient.from_env() as client:
38-
... result = await client.smartscraper(
39-
... website_url="https://example.com",
40-
... user_prompt="Extract products"
41-
... )
42-
>>>
43-
>>> asyncio.run(main())
44-
45-
For more information visit: https://scrapegraphai.com
46-
Documentation: https://docs.scrapegraphai.com
47-
"""
48-
49-
from .async_client import AsyncClient
50-
from .client import Client
51-
52-
# Scrape Models
53-
from .models.scrape import (
54-
ScrapeRequest,
55-
GetScrapeRequest,
56-
)
57-
58-
# Scheduled Jobs Models
59-
from .models.scheduled_jobs import (
60-
GetJobExecutionsRequest,
61-
GetScheduledJobRequest,
62-
GetScheduledJobsRequest,
63-
JobActionRequest,
64-
JobActionResponse,
65-
JobExecutionListResponse,
66-
JobExecutionResponse,
67-
JobTriggerResponse,
68-
ScheduledJobCreate,
69-
ScheduledJobListResponse,
70-
ScheduledJobResponse,
71-
ScheduledJobUpdate,
72-
ServiceType,
73-
TriggerJobRequest,
74-
)
75-
76-
__all__ = [
77-
"Client",
78-
"AsyncClient",
79-
# Scrape Models
80-
"ScrapeRequest",
81-
"GetScrapeRequest",
82-
# Scheduled Jobs Models
83-
"ServiceType",
84-
"ScheduledJobCreate",
85-
"ScheduledJobUpdate",
86-
"ScheduledJobResponse",
87-
"ScheduledJobListResponse",
88-
"JobExecutionResponse",
89-
"JobExecutionListResponse",
90-
"JobTriggerResponse",
91-
"JobActionResponse",
92-
"GetScheduledJobsRequest",
93-
"GetScheduledJobRequest",
94-
"GetJobExecutionsRequest",
95-
"TriggerJobRequest",
96-
"JobActionRequest",
97-
]
1+
"""
2+
ScrapeGraphAI Python SDK v2
3+
4+
A Python SDK for the ScrapeGraphAI v2 API, providing both synchronous
5+
and asynchronous clients for intelligent web scraping powered by AI.
6+
7+
Quick Start:
8+
>>> from scrapegraph_py import Client
9+
>>> client = Client(api_key="sgai-...")
10+
>>> result = client.scrape("https://example.com")
11+
>>> result = client.extract("https://example.com", prompt="Extract prices")
12+
>>> job = client.crawl.start("https://example.com", depth=3)
13+
14+
Async Usage:
15+
>>> import asyncio
16+
>>> from scrapegraph_py import AsyncClient
17+
>>> async def main():
18+
... async with AsyncClient(api_key="sgai-...") as client:
19+
... result = await client.extract(
20+
... url="https://example.com",
21+
... prompt="Extract products"
22+
... )
23+
>>> asyncio.run(main())
24+
"""
25+
26+
from .async_client import AsyncClient
27+
from .client import Client
28+
from .config import VERSION
29+
from .models.crawl import CrawlFormat, CrawlRequest
30+
from .models.extract import ExtractRequest
31+
from .models.history import HistoryFilter
32+
from .models.monitor import MonitorCreateRequest
33+
from .models.schema import SchemaRequest
34+
from .models.scrape import ScrapeFormat, ScrapeRequest
35+
from .models.search import SearchRequest
36+
from .models.shared import FetchConfig, LlmConfig
37+
38+
__version__ = VERSION
39+
40+
__all__ = [
41+
"Client",
42+
"AsyncClient",
43+
# Shared config
44+
"FetchConfig",
45+
"LlmConfig",
46+
# Scrape
47+
"ScrapeFormat",
48+
"ScrapeRequest",
49+
# Extract
50+
"ExtractRequest",
51+
# Search
52+
"SearchRequest",
53+
# Schema
54+
"SchemaRequest",
55+
# Crawl
56+
"CrawlFormat",
57+
"CrawlRequest",
58+
# Monitor
59+
"MonitorCreateRequest",
60+
# History
61+
"HistoryFilter",
62+
]

0 commit comments

Comments
 (0)