Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 64 additions & 3 deletions adata/common/utils/sunrequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import threading
import time
from urllib.parse import urlparse

import requests

Expand Down Expand Up @@ -45,6 +46,62 @@ class SunRequests(object):
def __init__(self, sun_proxy: SunProxy = None) -> None:
super().__init__()
self.sun_proxy = sun_proxy
# 域名请求频率限制配置: {domain: limit_per_minute}
self._domain_limits = {}
# 域名请求计数: {domain: {'count': int, 'current_time': int}}
self._domain_requests = {}
# 默认每分钟请求次数限制
self._default_limit = 30
# 线程锁
self._lock = threading.Lock()

def set_rate_limit(self, domain, limit):
"""
设置域名的请求频率限制
:param domain: 域名
:param limit: 每分钟请求次数
"""
with self._lock:
self._domain_limits[domain] = limit

def _check_rate_limit(self, url):
"""
检查并处理请求频率限制
:param url: 请求URL
"""
parsed_url = urlparse(url)
domain = parsed_url.netloc

with self._lock:
# 获取该域名的限制次数
limit = self._domain_limits.get(domain, self._default_limit)
current_time = int(time.time() // 60) # 当前分钟

# 初始化或更新域名的请求记录
if domain not in self._domain_requests:
self._domain_requests[domain] = {
'count': 0,
'current_time': current_time
}

requests_record = self._domain_requests[domain]

# 如果时间已经过了一分钟,重置计数器
if requests_record['current_time'] != current_time:
requests_record['count'] = 0
requests_record['current_time'] = current_time

# 检查是否超过限制
if requests_record['count'] >= limit:
# 等待到下一分钟
wait_time = 60 - (time.time() % 60) + 0.1
time.sleep(wait_time)
# 重置计数器
requests_record['count'] = 0
requests_record['current_time'] = int(time.time() // 60)

# 增加计数器
requests_record['count'] += 1

def request(self, method='get', url=None, times=3, retry_wait_time=1588, proxies=None, wait_time=None, **kwargs):
"""
Expand All @@ -58,9 +115,13 @@ def request(self, method='get', url=None, times=3, retry_wait_time=1588, proxies
:param kwargs: 其它 requests 参数,用法相同
:return: res
"""
# 1. 获取设置代理
# 1. 检查频率限制
self._check_rate_limit(url)

# 2. 获取设置代理
proxies = self.__get_proxies(proxies)
# 2. 请求数据结果

# 3. 请求数据结果
res = None
for i in range(times):
if wait_time:
Expand Down Expand Up @@ -90,4 +151,4 @@ def __get_proxies(self, proxies):
return proxies


sun_requests = SunRequests()
sun_requests = SunRequests()