-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.env.example
More file actions
111 lines (82 loc) · 4.04 KB
/
.env.example
File metadata and controls
111 lines (82 loc) · 4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# ------------------------------------------------------------------------------
# Paths & config
# ------------------------------------------------------------------------------
# Path to the YAML config file (scrapers, redis, server). Used by server and CLI.
CONFIG_PATH=config.yml
# ------------------------------------------------------------------------------
# Scraper CLI (flags override these when running the CLI)
# ------------------------------------------------------------------------------
# URL to scrape: list page (with -index) or a single listing detail page.
# SCRAPE_URL=
# Scrape listing index/list from the given URL; otherwise scrape a single detail page.
# SCRAPE_INDEX=false
# Page number when using -index (1-based).
# SCRAPE_PAGE=1
# When using -index, fetch full details for each listing (slower).
# SCRAPE_POPULATE=false
# Output path for JSON; default is stdout.
# SCRAPE_OUTPUT=
# Enable debug logging. Default false; set true for verbose logs.
SCRAPE_DEBUG=true
# When true, scrape from the requested page through the last page; when false, only the requested page.
SCRAPE_ALL_PAGES=false
# ------------------------------------------------------------------------------
# Concurrency (integer or e.g. 50% for half of NumCPU)
# ------------------------------------------------------------------------------
# Max parallel requests for scraping. Used by server and Engel & Völkers scraper.
SCRAPE_CONCURRENCY=1
# Optional: per-scraper concurrency (if a scraper uses it). Example for Engel & Völkers.
# ENGELVOELKERS_COM_CONCURRENCY=1
# ------------------------------------------------------------------------------
# Proxy (single URL or pool)
# ------------------------------------------------------------------------------
# Use proxy pool (validate and rotate). If false and PROXY_URL is set, use single proxy.
PROXY_POOL_ENABLED=true
# Single proxy URL when pool is disabled (e.g. http://host:port). Ignored when pool is enabled.
PROXY_URL=http://137.184.96.68:80
# On startup, clear cached proxy list from disk before validating. Useful after changing proxy sources.
# CLEAR_PROXY_CACHE=false
# Path to JSON file where validated proxies are cached.
PROXY_CACHE_PATH=proxies.json
# Cron schedule for refreshing the proxy pool (e.g. every 60 min).
PROXY_REFRESH_CRON="*/60 * * * *"
# URL used to check proxy exit IP (e.g. https://ipinfo.io/ip or http://api.ipify.org/ip).
PROXY_CHECK_URL=http://api.ipify.org/ip
# Timeout for each proxy check request (e.g. 5s).
PROXY_CHECK_TIMEOUT=5s
# How often to re-check proxies in the pool (e.g. 5s or 1m).
PROXY_CHECK_INTERVAL=5s
# Max concurrent validations when building/refreshing the pool.
PROXY_VALIDATE_CONCURRENCY=20
# ------------------------------------------------------------------------------
# HTTP client
# ------------------------------------------------------------------------------
# User-Agent sent with scraper and proxy-check requests.
USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
# ------------------------------------------------------------------------------
# Server (override config.yml; used when running the server)
# ------------------------------------------------------------------------------
# HTTP listen address (e.g. 0.0.0.0:8080).
# ADDRESS=0.0.0.0:8080
# Path for monitoring/health endpoint.
# MONITORING_PATH=/monitoring
# ------------------------------------------------------------------------------
# Redis (override config.yml; used by server for asynq and run store)
# ------------------------------------------------------------------------------
# Redis server address (host:port).
# REDIS_ADDR=localhost:6379
# Redis network (tcp or unix).
# REDIS_NETWORK=tcp
# Redis username (optional, for ACL).
# REDIS_USERNAME=
# Redis password (optional).
# REDIS_PASSWORD=
# Redis DB index (0-15 by default).
# REDIS_DB=0
# Dial timeout (e.g. 5s).
# REDIS_DIAL_TIMEOUT=5s
# Read/write timeouts (e.g. 3s).
# REDIS_READ_TIMEOUT=3s
# REDIS_WRITE_TIMEOUT=3s
# Connection pool size; 0 = default.
# REDIS_POOL_SIZE=0