qdrant · joein · May 20, 2025 · May 19, 2025
diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
@@ -112,11 +112,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -177,6 +178,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/image/onnx_image_model.py b/fastembed/image/onnx_image_model.py
@@ -97,6 +97,8 @@ def _embed_images(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -123,6 +125,8 @@ def _embed_images(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 

diff --git a/fastembed/late_interaction/colbert.py b/fastembed/late_interaction/colbert.py
@@ -169,11 +169,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
         self.mask_token_id: Optional[int] = None
         self.pad_token_id: Optional[int] = None
@@ -233,6 +234,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/late_interaction/token_embeddings.py b/fastembed/late_interaction/token_embeddings.py
@@ -9,7 +9,7 @@
 )
 from fastembed.text.onnx_embedding import OnnxTextEmbedding
 from fastembed.text.onnx_text_model import TextEmbeddingWorker
-import numpy as np
+
 
 supported_token_embeddings_models = [
     DenseModelDescription(

diff --git a/fastembed/late_interaction_multimodal/colpali.py b/fastembed/late_interaction_multimodal/colpali.py
@@ -95,11 +95,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
         self.mask_token_id = None
         self.pad_token_id = None
@@ -235,6 +236,8 @@ def embed_text(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 
@@ -268,6 +271,8 @@ def embed_image(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/late_interaction_multimodal/onnx_multimodal_model.py b/fastembed/late_interaction_multimodal/onnx_multimodal_model.py
@@ -120,6 +120,8 @@ def _embed_documents(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -146,6 +148,8 @@ def _embed_documents(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 
@@ -183,6 +187,8 @@ def _embed_images(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -209,6 +215,8 @@ def _embed_images(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 

diff --git a/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py b/fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py
@@ -131,11 +131,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -189,6 +190,8 @@ def rerank_pairs(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/rerank/cross_encoder/onnx_text_model.py b/fastembed/rerank/cross_encoder/onnx_text_model.py
@@ -94,6 +94,8 @@ def _rerank_pairs(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[float]:
         is_small = False
@@ -120,6 +122,8 @@ def _rerank_pairs(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }
 

diff --git a/fastembed/sparse/bm25.py b/fastembed/sparse/bm25.py
@@ -115,11 +115,12 @@ def __init__(
         model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         self.token_max_length = token_max_length
@@ -160,6 +161,8 @@ def _embed_documents(
         documents: Union[str, Iterable[str]],
         batch_size: int = 256,
         parallel: Optional[int] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
     ) -> Iterable[SparseEmbedding]:
         is_small = False
 
@@ -188,6 +191,8 @@ def _embed_documents(
                 "language": self.language,
                 "token_max_length": self.token_max_length,
                 "disable_stemmer": self.disable_stemmer,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
             }
             pool = ParallelWorkerPool(
                 num_workers=parallel or 1,
@@ -226,6 +231,8 @@ def embed(
             documents=documents,
             batch_size=batch_size,
             parallel=parallel,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
         )
 
     def _stem(self, tokens: list[str]) -> list[str]:

diff --git a/fastembed/sparse/bm42.py b/fastembed/sparse/bm42.py
@@ -110,11 +110,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         self.invert_vocab: dict[int, str] = {}
@@ -301,6 +302,8 @@ def embed(
             cuda=self.cuda,
             device_ids=self.device_ids,
             alpha=self.alpha,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
         )
 
     @classmethod

diff --git a/fastembed/sparse/minicoil.py b/fastembed/sparse/minicoil.py
@@ -127,11 +127,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -211,6 +212,9 @@ def embed(
             b=self.b,
             avg_len=self.avg_len,
             is_query=False,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
+            **kwargs,
         )
 
     def query_embed(
@@ -230,6 +234,9 @@ def query_embed(
             b=self.b,
             avg_len=self.avg_len,
             is_query=True,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
+            **kwargs,
         )
 
     @classmethod

diff --git a/fastembed/sparse/splade_pp.py b/fastembed/sparse/splade_pp.py
@@ -114,11 +114,12 @@ def __init__(
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
 
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -165,6 +166,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
@@ -247,11 +247,12 @@ def __init__(
 
         self.model_description = self._get_model_description(model_name)
         self.cache_dir = str(define_cache_dir(cache_dir))
+        self._specific_model_path = specific_model_path
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
             local_files_only=self._local_files_only,
-            specific_model_path=specific_model_path,
+            specific_model_path=self._specific_model_path,
         )
 
         if not self.lazy_load:
@@ -288,6 +289,8 @@ def embed(
             providers=self.providers,
             cuda=self.cuda,
             device_ids=self.device_ids,
+            local_files_only=self._local_files_only,
+            specific_model_path=self._specific_model_path,
             **kwargs,
         )
 

diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py
@@ -108,6 +108,8 @@ def _embed_documents(
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
         device_ids: Optional[list[int]] = None,
+        local_files_only: bool = False,
+        specific_model_path: Optional[str] = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -136,6 +138,8 @@ def _embed_documents(
                 "model_name": model_name,
                 "cache_dir": cache_dir,
                 "providers": providers,
+                "local_files_only": local_files_only,
+                "specific_model_path": specific_model_path,
                 **kwargs,
             }