From 3bfcece9880f261997db82f5e9256e8b774af62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E9=A2=96?= <2450800388@qq.com> Date: Sun, 15 Mar 2026 20:38:16 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E4=B8=BAsunrequests.py=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E8=AF=B7=E6=B1=82=E9=A2=91=E7=8E=87=E9=99=90=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adata/common/utils/sunrequests.py | 63 +++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/adata/common/utils/sunrequests.py b/adata/common/utils/sunrequests.py index 9fec0ed..9c011bf 100644 --- a/adata/common/utils/sunrequests.py +++ b/adata/common/utils/sunrequests.py @@ -10,6 +10,7 @@ import threading import time +from urllib.parse import urlparse import requests @@ -45,6 +46,58 @@ class SunRequests(object): def __init__(self, sun_proxy: SunProxy = None) -> None: super().__init__() self.sun_proxy = sun_proxy + # 域名请求频率限制 + self._rate_limit = {} + # 默认每分钟请求次数限制 + self._default_limit = 30 + # 线程锁 + self._lock = threading.Lock() + + def set_rate_limit(self, domain, limit): + """ + 设置域名的请求频率限制 + :param domain: 域名 + :param limit: 每分钟请求次数 + """ + with self._lock: + self._rate_limit[domain] = limit + + def _check_rate_limit(self, url): + """ + 检查并处理请求频率限制 + :param url: 请求URL + """ + parsed_url = urlparse(url) + domain = parsed_url.netloc + current_time = int(time.time() // 60) # 当前分钟 + + with self._lock: + # 获取该域名的限制次数 + limit = self._rate_limit.get(domain, self._default_limit) + + # 初始化或更新域名的请求记录 + if domain not in self._rate_limit: + self._rate_limit[domain] = { + 'count': 0, + 'current_time': current_time + } + + # 如果时间已经过了一分钟,重置计数器 + if self._rate_limit[domain]['current_time'] != current_time: + self._rate_limit[domain]['count'] = 0 + self._rate_limit[domain]['current_time'] = current_time + + # 检查是否超过限制 + if self._rate_limit[domain]['count'] >= limit: + # 等待到下一分钟 + wait_time = 60 - (time.time() % 60) + 0.1 + time.sleep(wait_time) + # 重置计数器 + self._rate_limit[domain]['count'] = 0 + self._rate_limit[domain]['current_time'] = int(time.time() // 60) + + # 增加计数器 + self._rate_limit[domain]['count'] += 1 def request(self, method='get', url=None, times=3, retry_wait_time=1588, proxies=None, wait_time=None, **kwargs): """ @@ -58,9 +111,13 @@ def request(self, method='get', url=None, times=3, retry_wait_time=1588, proxies :param kwargs: 其它 requests 参数,用法相同 :return: res """ - # 1. 获取设置代理 + # 1. 检查频率限制 + self._check_rate_limit(url) + + # 2. 获取设置代理 proxies = self.__get_proxies(proxies) - # 2. 请求数据结果 + + # 3. 请求数据结果 res = None for i in range(times): if wait_time: @@ -90,4 +147,4 @@ def __get_proxies(self, proxies): return proxies -sun_requests = SunRequests() +sun_requests = SunRequests() \ No newline at end of file From fa0aef154e569132cc8c59f3a0e3287ffd0959ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E9=A2=96?= <2450800388@qq.com> Date: Sun, 15 Mar 2026 21:16:12 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat:=20=E4=B8=BAsunrequests.py=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E5=9F=9F=E5=90=8D=E7=BA=A7=E8=AF=B7=E6=B1=82=E9=A2=91?= =?UTF-8?q?=E7=8E=87=E9=99=90=E5=88=B6=EF=BC=88Doubao=20Seed=20Code=202.0?= =?UTF-8?q?=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adata/common/utils/sunrequests.py | 32 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/adata/common/utils/sunrequests.py b/adata/common/utils/sunrequests.py index 9c011bf..501d9f7 100644 --- a/adata/common/utils/sunrequests.py +++ b/adata/common/utils/sunrequests.py @@ -46,8 +46,10 @@ class SunRequests(object): def __init__(self, sun_proxy: SunProxy = None) -> None: super().__init__() self.sun_proxy = sun_proxy - # 域名请求频率限制 - self._rate_limit = {} + # 域名请求频率限制配置: {domain: limit_per_minute} + self._domain_limits = {} + # 域名请求计数: {domain: {'count': int, 'current_time': int}} + self._domain_requests = {} # 默认每分钟请求次数限制 self._default_limit = 30 # 线程锁 @@ -60,7 +62,7 @@ def set_rate_limit(self, domain, limit): :param limit: 每分钟请求次数 """ with self._lock: - self._rate_limit[domain] = limit + self._domain_limits[domain] = limit def _check_rate_limit(self, url): """ @@ -69,35 +71,37 @@ def _check_rate_limit(self, url): """ parsed_url = urlparse(url) domain = parsed_url.netloc - current_time = int(time.time() // 60) # 当前分钟 with self._lock: # 获取该域名的限制次数 - limit = self._rate_limit.get(domain, self._default_limit) + limit = self._domain_limits.get(domain, self._default_limit) + current_time = int(time.time() // 60) # 当前分钟 # 初始化或更新域名的请求记录 - if domain not in self._rate_limit: - self._rate_limit[domain] = { + if domain not in self._domain_requests: + self._domain_requests[domain] = { 'count': 0, 'current_time': current_time } + requests_record = self._domain_requests[domain] + # 如果时间已经过了一分钟,重置计数器 - if self._rate_limit[domain]['current_time'] != current_time: - self._rate_limit[domain]['count'] = 0 - self._rate_limit[domain]['current_time'] = current_time + if requests_record['current_time'] != current_time: + requests_record['count'] = 0 + requests_record['current_time'] = current_time # 检查是否超过限制 - if self._rate_limit[domain]['count'] >= limit: + if requests_record['count'] >= limit: # 等待到下一分钟 wait_time = 60 - (time.time() % 60) + 0.1 time.sleep(wait_time) # 重置计数器 - self._rate_limit[domain]['count'] = 0 - self._rate_limit[domain]['current_time'] = int(time.time() // 60) + requests_record['count'] = 0 + requests_record['current_time'] = int(time.time() // 60) # 增加计数器 - self._rate_limit[domain]['count'] += 1 + requests_record['count'] += 1 def request(self, method='get', url=None, times=3, retry_wait_time=1588, proxies=None, wait_time=None, **kwargs): """