Add files via upload

BBC-Esq · web-flow · commit 32f679fb852b · 2025-05-31T05:49:36.000-04:00
diff --git a/src/module_ask_jeeves.py b/src/module_ask_jeeves.py
@@ -201,8 +201,8 @@ def __init__(self, parent=None):
         self.tokenizer = None
         self.worker = None
 
-        self.vector_db = QueryVectorDB(selected_database="user_manual")
-        self.model = SentenceTransformer('BAAI/bge-small-en-v1.5')
+        self.vector_db = QueryVectorDB.get_instance("user_manual")
+        self.model = SentenceTransformer('BAAI/bge-small-en-v1.5', token=False)
         self.question_embeddings = self.model.encode(master_questions)
         self.suggestion_cache = {}
         self.current_text = ""
@@ -299,7 +299,7 @@ def _load_model(self):
             device=device,
             intra_threads=physical_cores,
         )
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, trust_remote_code=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, token=False, trust_remote_code=True)
         self.eject_button.setEnabled(True)
 
     def showEvent(self, event):
diff --git a/src/module_process_images.py b/src/module_process_images.py
@@ -131,8 +131,8 @@ def initialize_model_and_tokenizer(self):
         save_dir = VISION_MODELS[chosen_model]["cache_dir"]
         cache_dir = CACHE_DIR / save_dir
         cache_dir.mkdir(parents=True, exist_ok=True)
-        model = AutoModelForCausalLM.from_pretrained(repo_id, trust_remote_code=True, low_cpu_mem_usage=True, cache_dir=cache_dir).eval()
-        processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True, cache_dir=cache_dir)
+        model = AutoModelForCausalLM.from_pretrained(repo_id, token=False, trust_remote_code=True, low_cpu_mem_usage=True, cache_dir=cache_dir).eval()
+        processor = AutoProcessor.from_pretrained(repo_id,  token=False, trust_remote_code=True, cache_dir=cache_dir)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if self.device.type == "cuda":
             if torch.cuda.get_device_capability()[0] >= 8:
@@ -176,8 +176,8 @@ def initialize_model_and_tokenizer(self):
         dtype = torch.bfloat16 if use_bf16 else torch.float16
         quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=dtype)
         AutoConfig.from_pretrained(model_id, cache_dir=cache_dir, trust_remote_code=True).vision_config.update(image_size=448)
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, low_cpu_mem_usage=True, trust_remote_code=True, quantization_config=quant_config, cache_dir=cache_dir).eval()
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, cache_dir=cache_dir)
+        model = AutoModelForCausalLM.from_pretrained(model_id, token=False, torch_dtype=dtype, low_cpu_mem_usage=True, trust_remote_code=True, quantization_config=quant_config, cache_dir=cache_dir).eval()
+        tokenizer = AutoTokenizer.from_pretrained(model_id, token=False, trust_remote_code=True, cache_dir=cache_dir)
         prec = "bfloat16" if use_bf16 else "float16"
         my_cprint(f"Running {chosen_model} on CUDA in {prec}", "green")
         return model, tokenizer, None
@@ -200,9 +200,9 @@ def initialize_model_and_tokenizer(self):
         source = info.get('model_path') or info['repo_id']
         cache_dir = CACHE_DIR / info.get('cache_dir','')
         cache_dir.mkdir(parents=True, exist_ok=True)
-        self.processor = AutoProcessor.from_pretrained(source, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto', cache_dir=cache_dir)
+        self.processor = AutoProcessor.from_pretrained(source, token=False, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto', cache_dir=cache_dir)
         quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True)
-        self.model = AutoModelForCausalLM.from_pretrained(source, trust_remote_code=True, quantization_config=quant_config, torch_dtype=torch.bfloat16, device_map='auto', cache_dir=cache_dir)
+        self.model = AutoModelForCausalLM.from_pretrained(source, token=False, trust_remote_code=True, quantization_config=quant_config, torch_dtype=torch.bfloat16, device_map='auto', cache_dir=cache_dir)
         self.model.model.vision_backbone = self.model.model.vision_backbone.to(torch.float32)
         self.model.eval()
         my_cprint(f"{chosen_model} vision model loaded into memory", "green")
@@ -338,7 +338,8 @@ def initialize_model_and_tokenizer(self):
             torch_dtype=self.dtype,
             trust_remote_code=True,
             multimodal_max_length=8192,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         ).to(self.device)
         
         # # Print model layers precision before eval
@@ -468,12 +469,14 @@ def initialize_model_and_tokenizer(self):
             torch_dtype=torch.bfloat16,
             low_cpu_mem_usage=True,
             trust_remote_code=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         ).eval()
         tokenizer = AutoTokenizer.from_pretrained(
             info['repo_id'],
             trust_remote_code=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         )
         my_cprint("InternVL2.5 vision model loaded into memory", "green")
         return model, tokenizer, None
@@ -564,14 +567,16 @@ def initialize_model_and_tokenizer(self):
         processor = AutoProcessor.from_pretrained(
             model_id,
             use_fast=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         )
         model = AutoModelForVision2Seq.from_pretrained(
             model_id,
             quantization_config=config,
             torch_dtype=torch.bfloat16,
             low_cpu_mem_usage=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         ).eval()
         my_cprint("Granite Vision model loaded into memory", "green")
         return model, None, processor
@@ -645,15 +650,17 @@ def initialize_model_and_tokenizer(self):
             min_pixels=28*28,
             max_pixels=1280*28*28,
             trust_remote_code=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         )
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             model_id,
             quantization_config=quantization_config,
             torch_dtype=torch.bfloat16,
             low_cpu_mem_usage=True,
             trust_remote_code=True,
-            cache_dir=cache_dir
+            cache_dir=cache_dir,
+            token=False
         )
         model = model.to(self.device)
         model.eval()
diff --git a/src/module_tts.py b/src/module_tts.py
@@ -105,21 +105,20 @@ def __init__(self):
     def initialize_model_and_processor(self):
         repository_id = "suno/bark" if self.config['size'] == 'normal' else f"suno/bark-{self.config['size']}"
         
-        self.processor = AutoProcessor.from_pretrained(repository_id, cache_dir=CACHE_DIR)
+        self.processor = AutoProcessor.from_pretrained(repository_id, token=False, cache_dir=CACHE_DIR)
         
         self.model = BarkModel.from_pretrained(
             repository_id,
             torch_dtype=torch.float16,
             cache_dir=CACHE_DIR,
+            token=False
             # attn_implementation="flash_attention_2"
         ).to(self.device)
 
         self.model.eval()
         
         my_cprint("Bark model loaded (float16)", "green")
 
-        # self.model = self.model.to_bettertransformer()
-
     @torch.inference_mode()
     def process_text_to_audio(self, sentences):
         for sentence in tqdm(sentences, desc="Processing Sentences"):
diff --git a/src/utilities.py b/src/utilities.py
@@ -184,7 +184,8 @@ def download_kokoro_tts():
         snapshot_download(
             repo_id=repo_id,
             local_dir=str(tts_path),
-            max_workers=4
+            max_workers=4,
+            token=False
         )
         print("Kokoro TTS model downloaded successfully")
         return True