Skip to content

Commit 277a436

Browse files
authored
v7.8.0
1 parent 3f27182 commit 277a436

File tree

3 files changed

+147
-97
lines changed

3 files changed

+147
-97
lines changed

src/constants.py

Lines changed: 84 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@
121121
"chardet==5.2.0",
122122
"charset-normalizer==3.4.2", # requests requires <4
123123
"chattts==0.2.3",
124-
"click==8.2.0",
124+
"click==8.1.8",
125125
"cloudpickle==3.1.1", # only required by tiledb-cloud and 3+ is only supported by tiledb-cloud 0.13+
126126
"colorama==0.4.6",
127127
"coloredlogs==15.0.1",
@@ -230,8 +230,8 @@
230230
"pyarrow==20.0.0",
231231
"pybase16384==0.3.8", # only required by chattts
232232
"pycparser==2.22",
233-
"pydantic==2.11.4", # unstructured-client==0.34.0 requires pydantic>=2.11.2
234-
"pydantic_core==2.33.2", # pydantic 2.11.4 requires pydantic_core==2.33.2
233+
"pydantic==2.11.3", # unstructured-client==0.35.0 requires pydantic>=2.11.2
234+
"pydantic_core==2.33.1", # pydantic 2.11.3 requires pydantic_core==2.33.1
235235
"pydantic-settings==2.9.1", # langchain-community==0.3.23 requires pydantic-settings>=2.4.0,<3.0.0
236236
"Pygments==2.19.1",
237237
"PyOpenGL==3.1.9",
@@ -489,37 +489,14 @@
489489
VECTOR_MODELS = {
490490
'Alibaba-NLP': [
491491
{
492-
'name': 'Alibaba-gte-base',
493-
'dimensions': 768,
492+
'name': 'gte-Qwen2-1.5B-instruct',
493+
'dimensions': 1536,
494494
'max_sequence': 8192,
495-
'size_mb': 547,
496-
'repo_id': 'Alibaba-NLP/gte-base-en-v1.5',
497-
'cache_dir': 'Alibaba-NLP--gte-base-en-v1.5',
495+
'size_mb': 7100,
496+
'repo_id': 'Alibaba-NLP/gte-Qwen2-1.5B-instruct',
497+
'cache_dir': 'Alibaba-NLP--gte-Qwen2-1.5B-instruct',
498498
'type': 'vector',
499-
'parameters': '137m',
500-
'precision': 'float32'
501-
},
502-
# compiles with triton and search requires cuda
503-
{
504-
'name': 'Alibaba-gte-modernbert-base',
505-
'dimensions': 768,
506-
'max_sequence': 8192,
507-
'size_mb': 298,
508-
'repo_id': 'Alibaba-NLP/gte-modernbert-base',
509-
'cache_dir': 'Alibaba-NLP--gte-modernbert-base',
510-
'type': 'vector',
511-
'parameters': '149m',
512-
'precision': 'float16'
513-
},
514-
{
515-
'name': 'Alibaba-gte-large',
516-
'dimensions': 1024,
517-
'max_sequence': 8192,
518-
'size_mb': 1740,
519-
'repo_id': 'Alibaba-NLP/gte-large-en-v1.5',
520-
'cache_dir': 'Alibaba-NLP--gte-large-en-v1.5',
521-
'type': 'vector',
522-
'parameters': '434m',
499+
'parameters': '1780m',
523500
'precision': 'float32'
524501
},
525502
],
@@ -557,6 +534,17 @@
557534
'parameters': '335m',
558535
'precision': 'float32'
559536
},
537+
# {
538+
# 'name': 'bge-code-v1',
539+
# 'dimensions': 1536,
540+
# 'max_sequence': 4096,
541+
# 'size_mb': 1340,
542+
# 'repo_id': 'BAAI/bge-code-v1',
543+
# 'cache_dir': 'BAAI--bge-code-v1',
544+
# 'type': 'vector',
545+
# 'parameters': '1540m',
546+
# 'precision': 'float32'
547+
# },
560548
],
561549
'IBM': [
562550
{
@@ -582,6 +570,30 @@
582570
'precision': 'bfloat16'
583571
},
584572
],
573+
'infly': [
574+
{
575+
'name': 'infly-retriever-v1-1.5b',
576+
'dimensions': 1536,
577+
'max_sequence': 8192,
578+
'size_mb': 3090,
579+
'repo_id': 'infly/inf-retriever-v1-1.5b',
580+
'cache_dir': 'infly--inf-retriever-v1-1.5b',
581+
'type': 'vector',
582+
'parameters': '1540m',
583+
'precision': 'bfloat16'
584+
},
585+
{
586+
'name': 'infly-retriever-v1-7b',
587+
'dimensions': 3584,
588+
'max_sequence': 8192,
589+
'size_mb': 14130,
590+
'repo_id': 'infly/inf-retriever-v1',
591+
'cache_dir': 'infly--inf-retriever-v1',
592+
'type': 'vector',
593+
'parameters': '7070m',
594+
'precision': 'bfloat16'
595+
},
596+
],
585597
'intfloat': [
586598
{
587599
'name': 'e5-small-v2',
@@ -617,35 +629,35 @@
617629
'precision': 'float32'
618630
},
619631
],
620-
'NovaSearch': [
621-
{
622-
'name': 'stella_en_1.5B_v5',
623-
'dimensions': 1024,
624-
'max_sequence': 131072,
625-
'size_mb': 6170,
626-
'repo_id': 'NovaSearch/stella_en_1.5B_v5',
627-
'cache_dir': 'NovaSearch--stella_en_1.5B_v5',
628-
'type': 'vector',
629-
'parameters': '1540m',
630-
'precision': 'float32'
631-
},
632-
{
633-
'name': 'stella_en_400M_v5',
634-
'dimensions': 1024,
635-
'max_sequence': 8192,
636-
'size_mb': 1740,
637-
'repo_id': 'NovaSearch/stella_en_400M_v5',
638-
'cache_dir': 'NovaSearch--stella_en_400M_v5',
639-
'type': 'vector',
640-
'parameters': '435m',
641-
'precision': 'float32'
642-
},
643-
],
632+
# 'NovaSearch': [
633+
# {
634+
# 'name': 'stella_en_1.5B_v5',
635+
# 'dimensions': 1024,
636+
# 'max_sequence': 512,
637+
# 'size_mb': 6170,
638+
# 'repo_id': 'NovaSearch/stella_en_1.5B_v5',
639+
# 'cache_dir': 'NovaSearch--stella_en_1.5B_v5',
640+
# 'type': 'vector',
641+
# 'parameters': '1540m',
642+
# 'precision': 'float32'
643+
# },
644+
# {
645+
# 'name': 'stella_en_400M_v5',
646+
# 'dimensions': 1024,
647+
# 'max_sequence': 512,
648+
# 'size_mb': 1740,
649+
# 'repo_id': 'NovaSearch/stella_en_400M_v5',
650+
# 'cache_dir': 'NovaSearch--stella_en_400M_v5',
651+
# 'type': 'vector',
652+
# 'parameters': '435m',
653+
# 'precision': 'float32'
654+
# },
655+
# ],
644656
'Snowflake': [
645657
{
646658
'name': 'arctic-embed-m-v2.0',
647659
'dimensions': 768,
648-
'max_sequence': 8192,
660+
'max_sequence':8192,
649661
'size_mb': 1220,
650662
'repo_id': 'Snowflake/snowflake-arctic-embed-m-v2.0',
651663
'cache_dir': 'Snowflake--snowflake-arctic-embed-m-v2.0',
@@ -2959,19 +2971,21 @@ def _generate_button_style(cls, color_values):
29592971
************************
29602972
29612973
# https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
2962-
+-------+----------------------------+----------------------------------------+----------------------------+
2963-
| Torch | Python | Stable | Experimental |
2964-
+-------+----------------------------+----------------------------------------+----------------------------+
2965-
| 2.6 | >=3.9, <=3.13 | CUDA 11.8, 12.4 + CUDNN 9.1.0.70 | CUDA 12.6 + CUDNN 9.5.1.17 | ***
2966-
+-------+----------------------------+----------------------------------------+----------------------------+
2967-
| 2.5 | >=3.9, <=3.12, (3.13 exp.) | CUDA 11.8, 12.1, 12.4 + CUDNN 9.1.0.70 | None |
2968-
+-------+----------------------------+----------------------------------------+----------------------------+
2969-
| 2.4 | >=3.8, <=3.12 | CUDA 11.8, 12.1 + CUDNN 9.1.0.70 | CUDA 12.4 + CUDNN 9.1.0.70 |
2970-
+-------+----------------------------+----------------------------------------+----------------------------+
2971-
| 2.3 | >=3.8, <=3.11, (3.12 exp.) | CUDA 11.8 + CUDNN 8.7.0.84 | CUDA 12.1 + CUDNN 8.9.2.26 |
2972-
+-------+----------------------------+----------------------------------------+----------------------------+
2973-
| 2.2 | >=3.8, <=3.11, (3.12 exp.) | CUDA 11.8 + CUDNN 8.7.0.84 | CUDA 12.1 + CUDNN 8.9.2.26 |
2974-
+-------+----------------------------+----------------------------------------+----------------------------+
2974+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2975+
| Torch | Python | Stable | Experimental |
2976+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2977+
| 2.7 | >=3.9, <=3.13 | CUDA 11.8 (cudnn 9.1.0.70), 12.6 (cudnn 9.5.1.17) | CUDA 12.8 (CUDNN 9.7.1.26) | ***
2978+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2979+
| 2.6 | >=3.9, <=3.13 | CUDA 11.8, 12.4 + CUDNN 9.1.0.70 | CUDA 12.6 + CUDNN 9.5.1.17 | ***
2980+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2981+
| 2.5 | >=3.9, <=3.12, (3.13 exp.) | CUDA 11.8, 12.1, 12.4 + CUDNN 9.1.0.70 | None |
2982+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2983+
| 2.4 | >=3.8, <=3.12 | CUDA 11.8, 12.1 + CUDNN 9.1.0.70 | CUDA 12.4 + CUDNN 9.1.0.70 |
2984+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2985+
| 2.3 | >=3.8, <=3.11, (3.12 exp.) | CUDA 11.8 + CUDNN 8.7.0.84 | CUDA 12.1 + CUDNN 8.9.2.26 |
2986+
+-------+----------------------------+---------------------------------------------------+----------------------------+
2987+
| 2.2 | >=3.8, <=3.11, (3.12 exp.) | CUDA 11.8 + CUDNN 8.7.0.84 | CUDA 12.1 + CUDNN 8.9.2.26 |
2988+
+-------+----------------------------+---------------------------------------------------+----------------------------+
29752989
29762990
***********************
29772991
Torch & Python & Triton

src/database_interactions.py

Lines changed: 60 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,26 @@ def prepare_encode_kwargs(self):
214214
# 1) add the custom prompt formatting if a query is being embedded
215215
if self.is_query:
216216
encode_kwargs["prompt"] = (
217-
f"<instruct>{self.DEFAULT_INSTRUCTION}\n<query>"
217+
f"<instruct>{self.DEFAULT_INSTRUCTION} <query>"
218218
)
219219

220220
return encode_kwargs
221221

222222

223+
class InflyEmbedding(BaseEmbeddingModel):
224+
def prepare_kwargs(self):
225+
# 1) inherit all kwargs from the base class
226+
infly_kwargs = super().prepare_kwargs()
227+
228+
# 2) update tokenizer_kwargs
229+
tok_kw = infly_kwargs.setdefault("tokenizer_kwargs", {})
230+
tok_kw.update({
231+
"max_length": 8192,
232+
})
233+
234+
return infly_kwargs
235+
236+
223237
def create_vector_db_in_process(database_name):
224238
create_vector_db = CreateVectorDB(database_name=database_name)
225239
create_vector_db.run()
@@ -270,6 +284,7 @@ def initialize_vector_model(self, embedding_model_name, config_data):
270284
model_kwargs = {
271285
"device": compute_device,
272286
"trust_remote_code": True,
287+
"similarity_fn_name": "euclidean", # (str, optional); "cosine" (default), "dot", "euclidean", "manhattan"
273288
"model_kwargs": {
274289
"torch_dtype": torch_dtype if torch_dtype is not None else None
275290
}
@@ -294,6 +309,10 @@ def initialize_vector_model(self, embedding_model_name, config_data):
294309
'gte-base': 14,
295310
'arctic-embed-m': 14,
296311
'stella_en_400M_v5': 20,
312+
'bge-code': 2,
313+
'infly-retriever-v1-1.5b': 4,
314+
'infly-retriever-v1-7b': 2,
315+
'stella_en_1.5b_v5': 4,
297316
}
298317

299318
for key, value in batch_size_mapping.items():
@@ -311,13 +330,19 @@ def initialize_vector_model(self, embedding_model_name, config_data):
311330
model = SnowflakeEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
312331
elif "alibaba" in embedding_model_name.lower():
313332
logger.debug("Matched Alibaba condition")
314-
model = AlibabaEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
333+
model = InflyEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
315334
elif "400m" in embedding_model_name.lower():
316335
logger.debug("Matched Stella 400m condition")
317336
model = Stella400MEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
318-
elif "1.5b" in embedding_model_name.lower():
337+
elif "stella_en_1.5b_v5" in embedding_model_name.lower():
319338
logger.debug("Matched Stella 1.5B condition")
320339
model = StellaEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
340+
elif "bge-code" in embedding_model_name.lower():
341+
logger.debug("Matches bge-code condition")
342+
model = BgeCodeEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
343+
elif "infly" in embedding_model_name.lower():
344+
logger.debug("Matches infly condition")
345+
model = InflyEmbedding(embedding_model_name, model_kwargs, encode_kwargs).create()
321346
else:
322347
logger.debug("No conditions matched - using base model")
323348
model = BaseEmbeddingModel(embedding_model_name, model_kwargs, encode_kwargs).create()
@@ -359,7 +384,7 @@ def create_database(self, texts, embeddings):
359384
tiledb_id = str(random.randint(0, MAX_UINT64 - 1))
360385

361386
text_str = str(doc.page_content or "").strip()
362-
if not text_str: # silently drop zero-length chunks
387+
if not text_str: # silently drop zero-length chunks
363388
continue
364389
all_texts.append(text_str)
365390

@@ -383,7 +408,7 @@ def create_database(self, texts, embeddings):
383408
with open(self.ROOT_DIRECTORY / "config.yaml", 'r', encoding='utf-8') as config_file:
384409
config_data = yaml.safe_load(config_file)
385410

386-
# pre‑compute vectors, then write DB
411+
# precompute vectors, then write DB
387412
vectors = embeddings.embed_documents(all_texts)
388413
text_embed_pairs = [
389414
(txt, np.asarray(vec, dtype=np.float32))
@@ -470,7 +495,6 @@ def create_metadata_db(self, documents, hash_id_mappings):
470495
finally:
471496
conn.close()
472497

473-
474498
def load_audio_documents(self, source_dir: Path = None) -> list:
475499
if source_dir is None:
476500
source_dir = self.SOURCE_DIRECTORY
@@ -598,39 +622,50 @@ def load_configuration(self):
598622
raise
599623

600624
@torch.inference_mode()
601-
def initialize_vector_model(self):
602-
model_path = self.config['created_databases'][self.selected_database]['model']
625+
def initialize_vector_model(self):
626+
model_path = self.config['created_databases'][self.selected_database]['model']
603627
self.model_name = os.path.basename(model_path)
604-
compute_device = self.config['Compute_Device']['database_query']
628+
compute_device = self.config['Compute_Device']['database_query']
605629

630+
# ── outer kwargs passed to SentenceTransformer ──────────────
606631
model_kwargs = {
607-
"device": compute_device,
632+
"device": compute_device,
608633
"trust_remote_code": True,
609-
"model_kwargs": {}
634+
"similarity_fn_name": "euclidean", # (str, optional); "cosine" (default), "dot", "euclidean", "manhattan"
635+
"model_kwargs": {
636+
"trust_remote_code": True,
637+
},
638+
"tokenizer_kwargs": {
639+
"use_fast": True,
640+
"trust_remote_code": True,
641+
},
610642
}
611-
# encode_kwargs = {'normalize_embeddings': True}
612643

613-
if "snowflake" in model_path.lower():
614-
logger.debug("Matched Snowflake condition")
644+
encode_kwargs = {"batch_size": 1}
645+
646+
mp_lower = model_path.lower()
647+
if "snowflake" in mp_lower:
615648
embeddings = SnowflakeEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
616-
elif "alibaba" in model_path.lower():
617-
logger.debug("Matched Alibaba condition")
618-
embeddings = AlibabaEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
619-
elif "400m" in model_path.lower():
620-
logger.debug("Matched Stella 400m condition")
649+
elif "alibaba" in mp_lower:
650+
embeddings = InflyEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
651+
elif "400m" in mp_lower:
621652
embeddings = Stella400MEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
622-
elif "1.5b" in model_path.lower():
623-
logger.debug("Matched Stella 1.5B condition")
653+
elif "stella_en_1.5b_v5" in mp_lower:
624654
embeddings = StellaEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
655+
elif "infly" in mp_lower:
656+
embeddings = InflyEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
657+
elif "bge-code" in mp_lower:
658+
embeddings = BgeCodeEmbedding(model_path, model_kwargs, encode_kwargs, is_query=True).create()
625659
else:
626-
if "bge" in model_path.lower():
627-
logger.debug("Matched BGE condition - setting prompt in encode_kwargs")
628-
encode_kwargs["prompt"] = "Represent this sentence for searching relevant passages: "
629-
logger.debug("No specific condition matched - using base model")
660+
if "bge" in mp_lower:
661+
encode_kwargs["prompt"] = (
662+
"Represent this sentence for searching relevant passages: "
663+
)
630664
embeddings = BaseEmbeddingModel(model_path, model_kwargs, encode_kwargs, is_query=True).create()
631665

632666
return embeddings
633667

668+
634669
def initialize_database(self):
635670
persist_directory = Path(__file__).resolve().parent / "Vector_DB" / self.selected_database
636671

src/gui_tabs_models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ def __init__(self, parent=None):
2222
self.model_radiobuttons.setExclusive(True)
2323
self.stretch_factors = {
2424
'BAAI': 4,
25-
'NovaSearch': 3,
25+
# 'NovaSearch': 3,
2626
'intfloat': 4,
27-
'Alibaba-NLP': 4,
27+
'Alibaba-NLP': 2,
2828
'IBM': 3,
29+
'infly': 3,
2930
'Snowflake': 3,
3031
}
3132

0 commit comments

Comments
 (0)