ScrapeGraphAI
diff --git a/‎examples/async_crawl_example.py‎
Lines changed: 33 additions & 0 deletions b/‎examples/async_crawl_example.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎examples/async_credits_example.py‎
Lines changed: 17 additions & 0 deletions b/‎examples/async_credits_example.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎examples/async_extract_example.py‎
Lines changed: 40 additions & 0 deletions b/‎examples/async_extract_example.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎examples/async_monitor_example.py‎
Lines changed: 27 additions & 0 deletions b/‎examples/async_monitor_example.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/async_scrape_example.py‎
Lines changed: 27 additions & 0 deletions b/‎examples/async_scrape_example.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/async_search_example.py‎
Lines changed: 26 additions & 0 deletions b/‎examples/async_search_example.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎examples/crawl_basic_example.py‎
Lines changed: 35 additions & 0 deletions b/‎examples/crawl_basic_example.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎examples/crawl_stop_resume_example.py‎
Lines changed: 26 additions & 0 deletions b/‎examples/crawl_stop_resume_example.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎examples/crawl_with_fetch_config_example.py‎
Lines changed: 27 additions & 0 deletions b/‎examples/crawl_with_fetch_config_example.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/crawl_with_patterns_example.py‎
Lines changed: 24 additions & 0 deletions b/‎examples/crawl_with_patterns_example.py‎
Lines changed: 24 additions & 0 deletions
@@ -0,0 +1,33 @@
+"""
+Async crawl example.
+"""
+
+import asyncio
+import json
+
+from scrapegraph_py import AsyncClient
+
+
+async def main():
+    async with AsyncClient() as client:
+        # Start crawl
+        job = await client.crawl.start(
+            "https://example.com",
+            depth=2,
+            max_pages=5,
+        )
+        print("Crawl started:", json.dumps(job, indent=2))
+
+        # Poll for completion
+        crawl_id = job["id"]
+        while True:
+            status = await client.crawl.status(crawl_id)
+            print(f"Status: {status.get('status')}")
+            if status.get("status") in ("completed", "failed"):
+                break
+            await asyncio.sleep(2)
+
+        print("\nResult:", json.dumps(status, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,17 @@
+"""
+Async credits check.
+"""
+
+import asyncio
+import json
+
+from scrapegraph_py import AsyncClient
+
+
+async def main():
+    async with AsyncClient() as client:
+        credits = await client.credits()
+        print(json.dumps(credits, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,40 @@
+"""
+Async extract example - extract data from multiple pages concurrently.
+"""
+
+import asyncio
+import json
+
+from pydantic import BaseModel, Field
+
+from scrapegraph_py import AsyncClient
+
+
+class PageInfo(BaseModel):
+    title: str = Field(description="Page title")
+    description: str = Field(description="Brief description of the page content")
+
+
+async def main():
+    async with AsyncClient() as client:
+        urls = [
+            "https://example.com",
+            "https://httpbin.org/html",
+        ]
+
+        tasks = [
+            client.extract(
+                url=url,
+                prompt="Extract the page title and a brief description",
+                output_schema=PageInfo,
+            )
+            for url in urls
+        ]
+        results = await asyncio.gather(*tasks)
+
+        for url, result in zip(urls, results):
+            print(f"\n=== {url} ===")
+            print(json.dumps(result, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,27 @@
+"""
+Async monitor example.
+"""
+
+import asyncio
+import json
+
+from scrapegraph_py import AsyncClient
+
+
+async def main():
+    async with AsyncClient() as client:
+        # Create a monitor
+        monitor = await client.monitor.create(
+            name="Async Price Tracker",
+            url="https://example.com/products",
+            prompt="Extract product prices",
+            cron="0 12 * * *",  # Every day at noon
+        )
+        print("Created:", json.dumps(monitor, indent=2))
+
+        # List all monitors
+        all_monitors = await client.monitor.list()
+        print("\nAll monitors:", json.dumps(all_monitors, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,27 @@
+"""
+Async scrape example - scrape multiple pages concurrently.
+"""
+
+import asyncio
+import json
+
+from scrapegraph_py import AsyncClient
+
+
+async def main():
+    async with AsyncClient() as client:
+        # Scrape multiple pages concurrently
+        urls = [
+            "https://example.com",
+            "https://httpbin.org/html",
+        ]
+
+        tasks = [client.scrape(url) for url in urls]
+        results = await asyncio.gather(*tasks)
+
+        for url, result in zip(urls, results):
+            print(f"\n=== {url} ===")
+            print(json.dumps(result, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,26 @@
+"""
+Async search example - run multiple searches concurrently.
+"""
+
+import asyncio
+import json
+
+from scrapegraph_py import AsyncClient
+
+
+async def main():
+    async with AsyncClient() as client:
+        queries = [
+            "best python frameworks 2025",
+            "top javascript libraries 2025",
+        ]
+
+        tasks = [client.search(q, num_results=3) for q in queries]
+        results = await asyncio.gather(*tasks)
+
+        for query, result in zip(queries, results):
+            print(f"\n=== {query} ===")
+            print(json.dumps(result, indent=2))
+
+
+asyncio.run(main())
@@ -0,0 +1,35 @@
+"""
+Crawl a website and get pages as markdown.
+
+The crawl endpoint discovers and fetches multiple pages from a website,
+starting from a given URL and following links up to a specified depth.
+"""
+
+import json
+import time
+
+from scrapegraph_py import Client
+
+client = Client()  # uses SGAI_API_KEY env var
+
+# Start the crawl
+job = client.crawl.start(
+    "https://example.com",
+    depth=2,
+    max_pages=5,
+    format="markdown",
+)
+print("Crawl started:", json.dumps(job, indent=2))
+
+# Poll for status
+crawl_id = job["id"]
+while True:
+    status = client.crawl.status(crawl_id)
+    print(f"Status: {status.get('status')}")
+    if status.get("status") in ("completed", "failed"):
+        break
+    time.sleep(2)
+
+print("\nFinal result:", json.dumps(status, indent=2))
+
+client.close()
@@ -0,0 +1,26 @@
+"""
+Stop and resume a crawl job.
+
+You can stop a running crawl and resume it later.
+"""
+
+import json
+
+from scrapegraph_py import Client
+
+client = Client()  # uses SGAI_API_KEY env var
+
+# Start a crawl
+job = client.crawl.start("https://example.com", depth=3, max_pages=50)
+crawl_id = job["id"]
+print("Crawl started:", crawl_id)
+
+# Stop the crawl
+stopped = client.crawl.stop(crawl_id)
+print("Stopped:", json.dumps(stopped, indent=2))
+
+# Resume the crawl later
+resumed = client.crawl.resume(crawl_id)
+print("Resumed:", json.dumps(resumed, indent=2))
+
+client.close()
@@ -0,0 +1,27 @@
+"""
+Crawl with custom fetch configuration.
+
+Use FetchConfig to enable stealth mode, JS rendering, etc. for all
+pages during the crawl.
+"""
+
+import json
+
+from scrapegraph_py import Client, FetchConfig
+
+client = Client()  # uses SGAI_API_KEY env var
+
+job = client.crawl.start(
+    "https://example.com",
+    depth=2,
+    max_pages=10,
+    format="html",
+    fetch_config=FetchConfig(
+        stealth=True,
+        render_js=True,
+        wait_ms=1000,
+    ),
+)
+print("Crawl started:", json.dumps(job, indent=2))
+
+client.close()
@@ -0,0 +1,24 @@
+"""
+Crawl a website with URL pattern filtering.
+
+Use include_patterns and exclude_patterns to control which pages
+the crawler visits. Patterns support * (any chars) and ** (any path segments).
+"""
+
+import json
+
+from scrapegraph_py import Client
+
+client = Client()  # uses SGAI_API_KEY env var
+
+job = client.crawl.start(
+    "https://example.com",
+    depth=3,
+    max_pages=20,
+    format="markdown",
+    include_patterns=["/blog/*", "/docs/**"],
+    exclude_patterns=["/admin/*", "/api/*"],
+)
+print("Crawl started:", json.dumps(job, indent=2))
+
+client.close()