From 95d166fd19b03cf99d5804f3acada6bc527430c4 Mon Sep 17 00:00:00 2001
From: George Panchuk <george.panchuk@qdrant.tech>
Date: Tue, 20 May 2025 00:27:40 +0400
Subject: [PATCH] fix: propagate local files only and specific model path into
 embed parallel

---
 fastembed/image/onnx_embedding.py                        | 5 ++++-
 fastembed/image/onnx_image_model.py                      | 4 ++++
 fastembed/late_interaction/colbert.py                    | 5 ++++-
 fastembed/late_interaction/token_embeddings.py           | 2 +-
 fastembed/late_interaction_multimodal/colpali.py         | 7 ++++++-
 .../late_interaction_multimodal/onnx_multimodal_model.py | 8 ++++++++
 .../rerank/cross_encoder/onnx_text_cross_encoder.py      | 5 ++++-
 fastembed/rerank/cross_encoder/onnx_text_model.py        | 4 ++++
 fastembed/sparse/bm25.py                                 | 9 ++++++++-
 fastembed/sparse/bm42.py                                 | 5 ++++-
 fastembed/sparse/minicoil.py                             | 9 ++++++++-
 fastembed/sparse/splade_pp.py                            | 5 ++++-
 fastembed/text/onnx_embedding.py                         | 5 ++++-
 fastembed/text/onnx_text_model.py                        | 4 ++++
 14 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
index f82fbae3d..3b83b2483 100644
--- a/fastembed/image/onnx_embedding.py
+++ b/fastembed/image/onnx_embedding.py
@@ -112,11 +112,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -177,6 +178,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/image/onnx_image_model.py b/fastembed/image/onnx_image_model.py
index f26ee9e08..a345f024c 100644
--- a/fastembed/image/onnx_image_model.py
+++ b/fastembed/image/onnx_image_model.py
@@ -97,6 +97,8 @@ def _embed_images(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -123,6 +125,8 @@ def _embed_images(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 
diff --git a/fastembed/late_interaction/colbert.py b/fastembed/late_interaction/colbert.py
index cdbf9f509..9d1ccf9a2 100644
--- a/fastembed/late_interaction/colbert.py
+++ b/fastembed/late_interaction/colbert.py
@@ -169,11 +169,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
         self.mask_token_id: Optional[int] = None
         self.pad_token_id: Optional[int] = None
@@ -233,6 +234,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/late_interaction/token_embeddings.py b/fastembed/late_interaction/token_embeddings.py
index 1d4ebc9c1..ec4844ba5 100644
--- a/fastembed/late_interaction/token_embeddings.py
+++ b/fastembed/late_interaction/token_embeddings.py
@@ -9,7 +9,7 @@
 )
 from fastembed.text.onnx_embedding import OnnxTextEmbedding
 from fastembed.text.onnx_text_model import TextEmbeddingWorker
-import numpy as np
+
 
 supported_token_embeddings_models = [
     DenseModelDescription(
diff --git a/fastembed/late_interaction_multimodal/colpali.py b/fastembed/late_interaction_multimodal/colpali.py
index c43ff9d00..c6b98e23e 100644
--- a/fastembed/late_interaction_multimodal/colpali.py
+++ b/fastembed/late_interaction_multimodal/colpali.py
@@ -95,11 +95,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
         self.mask_token_id = None
         self.pad_token_id = None
@@ -235,6 +236,8 @@ def embed_text(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
@@ -268,6 +271,8 @@ def embed_image(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/late_interaction_multimodal/onnx_multimodal_model.py b/fastembed/late_interaction_multimodal/onnx_multimodal_model.py
index 089ba1b75..83706a2b4 100644
--- a/fastembed/late_interaction_multimodal/onnx_multimodal_model.py
+++ b/fastembed/late_interaction_multimodal/onnx_multimodal_model.py
@@ -120,6 +120,8 @@ def _embed_documents(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -146,6 +148,8 @@ def _embed_documents(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 
@@ -183,6 +187,8 @@ def _embed_images(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -209,6 +215,8 @@ def _embed_images(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 
diff --git a/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py b/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py
index 238f3acc7..a171afa40 100644
--- a/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py
+++ b/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py
@@ -131,11 +131,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -189,6 +190,8 @@ def rerank_pairs(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/rerank/cross_encoder/onnx_text_model.py b/fastembed/rerank/cross_encoder/onnx_text_model.py
index de022e172..3fc4e81c4 100644
--- a/fastembed/rerank/cross_encoder/onnx_text_model.py
+++ b/fastembed/rerank/cross_encoder/onnx_text_model.py
@@ -94,6 +94,8 @@ def _rerank_pairs(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[float]:
         is_small = False
@@ -120,6 +122,8 @@ def _rerank_pairs(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 
diff --git a/fastembed/sparse/bm25.py b/fastembed/sparse/bm25.py
index f6806e72d..b6ac59fda 100644
--- a/fastembed/sparse/bm25.py
+++ b/fastembed/sparse/bm25.py
@@ -115,11 +115,12 @@ def __init__(
         model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         self.token_max_length = token_max_length
@@ -160,6 +161,8 @@ def _embed_documents(
         documents: Union[str, Iterable[str]],
         batch_size: int = 256,
         parallel: Optional[int] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
     ) -> Iterable[SparseEmbedding]:
         is_small = False
 
@@ -188,6 +191,8 @@ def _embed_documents(
                 "language": self.language,
                 "token_max_length": self.token_max_length,
                 "disable_stemmer": self.disable_stemmer,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
             }
             pool = ParallelWorkerPool(
                 num_workers=parallel or 1,
@@ -226,6 +231,8 @@ def embed(
             documents=documents,
             batch_size=batch_size,
             parallel=parallel,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
         )
 
     def _stem(self, tokens: list[str]) -> list[str]:
diff --git a/fastembed/sparse/bm42.py b/fastembed/sparse/bm42.py
index 5d72d6c7a..5fb90d712 100644
--- a/fastembed/sparse/bm42.py
+++ b/fastembed/sparse/bm42.py
@@ -110,11 +110,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         self.invert_vocab: dict[int, str] = {}
@@ -301,6 +302,8 @@ def embed(
             cuda=self.cuda,
             device_ids=self.device_ids,
             alpha=self.alpha,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
         )
 
     @classmethod
diff --git a/fastembed/sparse/minicoil.py b/fastembed/sparse/minicoil.py
index 6ade9dfca..475cc9d88 100644
--- a/fastembed/sparse/minicoil.py
+++ b/fastembed/sparse/minicoil.py
@@ -127,11 +127,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -211,6 +212,9 @@ def embed(
             b=self.b,
             avg_len=self.avg_len,
             is_query=False,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
+            **kwargs,
         )
 
     def query_embed(
@@ -230,6 +234,9 @@ def query_embed(
             b=self.b,
             avg_len=self.avg_len,
             is_query=True,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
+            **kwargs,
         )
 
     @classmethod
diff --git a/fastembed/sparse/splade_pp.py b/fastembed/sparse/splade_pp.py
index b1354b7c0..d2c4af38a 100644
--- a/fastembed/sparse/splade_pp.py
+++ b/fastembed/sparse/splade_pp.py
@@ -114,11 +114,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -165,6 +166,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
index 8f145b3e5..4cc892f59 100644
--- a/fastembed/text/onnx_embedding.py
+++ b/fastembed/text/onnx_embedding.py
@@ -247,11 +247,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -288,6 +289,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py
index 45a9dc0e9..c939b21d5 100644
--- a/fastembed/text/onnx_text_model.py
+++ b/fastembed/text/onnx_text_model.py
@@ -108,6 +108,8 @@ def _embed_documents(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -136,6 +138,8 @@ def _embed_documents(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }