diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index a656730a1..50c57d932 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -1,6 +1,7 @@ import asyncio import io import os +import ssl import time from collections import deque from collections.abc import Iterable @@ -509,34 +510,62 @@ def _download_file_bytes(self, file_meta: dict[str, Any]) -> bytes: # Binary download (get_media also doesn't accept the Drive flags) request = self.service.files().get_media(fileId=file_id) - # Download the file with error handling for misclassified Google Docs + # Download the file with error handling for misclassified Google Docs and + # transient SSL/network errors. Each SSL retry recreates the downloader + # from scratch to avoid partial-buffer state. + _max_retries = 3 + for _attempt in range(_max_retries): fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024) done = False - + _should_retry = False + _delay = 0.0 + _err: Exception | None = None try: - while not done: - status, done = downloader.next_chunk() - # Optional: you can log progress via status.progress() - except HttpError as e: - # If download fails with "fileNotDownloadable", it's a Docs Editor file - # that wasn't properly detected. Retry with export_media. - if "fileNotDownloadable" in str(e) and mime_type not in exportable_types: - logger.warning( - f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. " - f"Retrying with export_media (file might be a Google Doc)" - ) - export_mime = "application/pdf" - request = self.service.files().export_media( - fileId=file_id, mimeType=export_mime - ) - fh = io.BytesIO() - downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024) - done = False + try: while not done: status, done = downloader.next_chunk() - else: - raise + except HttpError as e: + # If download fails with "fileNotDownloadable", it's a Docs Editor file + # that wasn't properly detected. Retry with export_media. + if "fileNotDownloadable" in str(e) and mime_type not in exportable_types: + logger.warning( + f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. " + f"Retrying with export_media (file might be a Google Doc)" + ) + export_mime = "application/pdf" + with self._lock: + request = self.service.files().export_media( + fileId=file_id, mimeType=export_mime + ) + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024) + done = False + while not done: + status, done = downloader.next_chunk() + else: + raise + break # download succeeded + except (ssl.SSLError, ConnectionResetError, TimeoutError) as e: + _should_retry = _attempt < _max_retries - 1 + _delay = 2.0**_attempt + _err = e + + if _should_retry and _err is not None: + logger.warning( + "[GoogleDrive] Transient network error for %s (attempt %d/%d), " + "retrying in %.1fs: %s", + file_id, + _attempt + 1, + _max_retries, + _delay, + _err, + ) + time.sleep(_delay) + continue + + if _err is not None: + raise _err data = fh.getvalue() logger.debug("[GoogleDrive] _download_file_bytes: done, %d bytes", len(data))