@@ -131,8 +131,8 @@ def initialize_model_and_tokenizer(self):
131131 save_dir = VISION_MODELS [chosen_model ]["cache_dir" ]
132132 cache_dir = CACHE_DIR / save_dir
133133 cache_dir .mkdir (parents = True , exist_ok = True )
134- model = AutoModelForCausalLM .from_pretrained (repo_id , trust_remote_code = True , low_cpu_mem_usage = True , cache_dir = cache_dir ).eval ()
135- processor = AutoProcessor .from_pretrained (repo_id , trust_remote_code = True , cache_dir = cache_dir )
134+ model = AutoModelForCausalLM .from_pretrained (repo_id , token = False , trust_remote_code = True , low_cpu_mem_usage = True , cache_dir = cache_dir ).eval ()
135+ processor = AutoProcessor .from_pretrained (repo_id , token = False , trust_remote_code = True , cache_dir = cache_dir )
136136 self .device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
137137 if self .device .type == "cuda" :
138138 if torch .cuda .get_device_capability ()[0 ] >= 8 :
@@ -176,8 +176,8 @@ def initialize_model_and_tokenizer(self):
176176 dtype = torch .bfloat16 if use_bf16 else torch .float16
177177 quant_config = BitsAndBytesConfig (load_in_4bit = True , bnb_4bit_quant_type = "nf4" , bnb_4bit_compute_dtype = dtype )
178178 AutoConfig .from_pretrained (model_id , cache_dir = cache_dir , trust_remote_code = True ).vision_config .update (image_size = 448 )
179- model = AutoModelForCausalLM .from_pretrained (model_id , torch_dtype = dtype , low_cpu_mem_usage = True , trust_remote_code = True , quantization_config = quant_config , cache_dir = cache_dir ).eval ()
180- tokenizer = AutoTokenizer .from_pretrained (model_id , trust_remote_code = True , cache_dir = cache_dir )
179+ model = AutoModelForCausalLM .from_pretrained (model_id , token = False , torch_dtype = dtype , low_cpu_mem_usage = True , trust_remote_code = True , quantization_config = quant_config , cache_dir = cache_dir ).eval ()
180+ tokenizer = AutoTokenizer .from_pretrained (model_id , token = False , trust_remote_code = True , cache_dir = cache_dir )
181181 prec = "bfloat16" if use_bf16 else "float16"
182182 my_cprint (f"Running { chosen_model } on CUDA in { prec } " , "green" )
183183 return model , tokenizer , None
@@ -200,9 +200,9 @@ def initialize_model_and_tokenizer(self):
200200 source = info .get ('model_path' ) or info ['repo_id' ]
201201 cache_dir = CACHE_DIR / info .get ('cache_dir' ,'' )
202202 cache_dir .mkdir (parents = True , exist_ok = True )
203- self .processor = AutoProcessor .from_pretrained (source , trust_remote_code = True , torch_dtype = torch .bfloat16 , device_map = 'auto' , cache_dir = cache_dir )
203+ self .processor = AutoProcessor .from_pretrained (source , token = False , trust_remote_code = True , torch_dtype = torch .bfloat16 , device_map = 'auto' , cache_dir = cache_dir )
204204 quant_config = BitsAndBytesConfig (load_in_4bit = True , bnb_4bit_compute_dtype = torch .bfloat16 , bnb_4bit_quant_type = "nf4" , bnb_4bit_use_double_quant = True )
205- self .model = AutoModelForCausalLM .from_pretrained (source , trust_remote_code = True , quantization_config = quant_config , torch_dtype = torch .bfloat16 , device_map = 'auto' , cache_dir = cache_dir )
205+ self .model = AutoModelForCausalLM .from_pretrained (source , token = False , trust_remote_code = True , quantization_config = quant_config , torch_dtype = torch .bfloat16 , device_map = 'auto' , cache_dir = cache_dir )
206206 self .model .model .vision_backbone = self .model .model .vision_backbone .to (torch .float32 )
207207 self .model .eval ()
208208 my_cprint (f"{ chosen_model } vision model loaded into memory" , "green" )
@@ -338,7 +338,8 @@ def initialize_model_and_tokenizer(self):
338338 torch_dtype = self .dtype ,
339339 trust_remote_code = True ,
340340 multimodal_max_length = 8192 ,
341- cache_dir = cache_dir
341+ cache_dir = cache_dir ,
342+ token = False
342343 ).to (self .device )
343344
344345 # # Print model layers precision before eval
@@ -468,12 +469,14 @@ def initialize_model_and_tokenizer(self):
468469 torch_dtype = torch .bfloat16 ,
469470 low_cpu_mem_usage = True ,
470471 trust_remote_code = True ,
471- cache_dir = cache_dir
472+ cache_dir = cache_dir ,
473+ token = False
472474 ).eval ()
473475 tokenizer = AutoTokenizer .from_pretrained (
474476 info ['repo_id' ],
475477 trust_remote_code = True ,
476- cache_dir = cache_dir
478+ cache_dir = cache_dir ,
479+ token = False
477480 )
478481 my_cprint ("InternVL2.5 vision model loaded into memory" , "green" )
479482 return model , tokenizer , None
@@ -564,14 +567,16 @@ def initialize_model_and_tokenizer(self):
564567 processor = AutoProcessor .from_pretrained (
565568 model_id ,
566569 use_fast = True ,
567- cache_dir = cache_dir
570+ cache_dir = cache_dir ,
571+ token = False
568572 )
569573 model = AutoModelForVision2Seq .from_pretrained (
570574 model_id ,
571575 quantization_config = config ,
572576 torch_dtype = torch .bfloat16 ,
573577 low_cpu_mem_usage = True ,
574- cache_dir = cache_dir
578+ cache_dir = cache_dir ,
579+ token = False
575580 ).eval ()
576581 my_cprint ("Granite Vision model loaded into memory" , "green" )
577582 return model , None , processor
@@ -645,15 +650,17 @@ def initialize_model_and_tokenizer(self):
645650 min_pixels = 28 * 28 ,
646651 max_pixels = 1280 * 28 * 28 ,
647652 trust_remote_code = True ,
648- cache_dir = cache_dir
653+ cache_dir = cache_dir ,
654+ token = False
649655 )
650656 model = Qwen2_5_VLForConditionalGeneration .from_pretrained (
651657 model_id ,
652658 quantization_config = quantization_config ,
653659 torch_dtype = torch .bfloat16 ,
654660 low_cpu_mem_usage = True ,
655661 trust_remote_code = True ,
656- cache_dir = cache_dir
662+ cache_dir = cache_dir ,
663+ token = False
657664 )
658665 model = model .to (self .device )
659666 model .eval ()
0 commit comments