@@ -311,18 +311,18 @@ def __init__(self, config):
311311 if self .device == "cuda" :
312312 if native in ("float32" , "bfloat16" ) and has_bfloat16_support ():
313313 self .dtype = torch .bfloat16
314- print (f"OVIS: Selected bfloat16 precision based on native={ native } " )
314+ # print(f"OVIS: Selected bfloat16 precision based on native={native}")
315315 elif native == "float32" :
316316 self .dtype = torch .float16
317- print (f"OVIS: Selected float16 precision based on native={ native } " )
317+ # print(f"OVIS: Selected float16 precision based on native={native}")
318318 else :
319319 self .dtype = torch .float16
320- print (f"OVIS: Selected float16 precision based on native={ native } " )
320+ # print(f"OVIS: Selected float16 precision based on native={native}")
321321 else :
322322 self .dtype = torch .float32
323- print (f"OVIS: Selected float32 precision for CPU based on native={ native } " )
323+ # print(f"OVIS: Selected float32 precision for CPU based on native={native}")
324324
325- print (f"OVIS: Device={ self .device } , Initial dtype selection={ self .dtype } " )
325+ # print(f"OVIS: Device={self.device}, Initial dtype selection={self.dtype}")
326326
327327 def initialize_model_and_tokenizer (self ):
328328 chosen_model = self .config ["vision" ]["chosen_model" ]
@@ -341,50 +341,50 @@ def initialize_model_and_tokenizer(self):
341341 cache_dir = cache_dir
342342 ).to (self .device )
343343
344- # Print model layers precision before eval
345- print ("OVIS: Model layer precisions after loading:" )
346- for name , module in model .named_modules ():
347- if isinstance (module , (torch .nn .Linear , torch .nn .Conv2d , torch .nn .LayerNorm )):
348- if hasattr (module , "weight" ) and module .weight is not None :
349- print (f" Layer { name } : { module .weight .dtype } " )
344+ # # Print model layers precision before eval
345+ # print("OVIS: Model layer precisions after loading:")
346+ # for name, module in model.named_modules():
347+ # if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.LayerNorm)):
348+ # if hasattr(module, "weight") and module.weight is not None:
349+ # print(f" Layer {name}: {module.weight.dtype}")
350350
351351 model .eval ()
352352
353- # Print model layers precision after eval
354- print ("OVIS: Model layer precisions after eval():" )
355- for name , module in model .named_modules ():
356- if isinstance (module , (torch .nn .Linear , torch .nn .Conv2d , torch .nn .LayerNorm )):
357- if hasattr (module , "weight" ) and module .weight is not None :
358- print (f" Layer { name } : { module .weight .dtype } " )
353+ # # Print model layers precision after eval
354+ # print("OVIS: Model layer precisions after eval():")
355+ # for name, module in model.named_modules():
356+ # if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.LayerNorm)):
357+ # if hasattr(module, "weight") and module.weight is not None:
358+ # print(f" Layer {name}: {module.weight.dtype}")
359359
360360 text_tokenizer = model .get_text_tokenizer ()
361361 visual_tokenizer = model .get_visual_tokenizer ()
362362
363- # Print visual tokenizer layer info before conversion
364- print ("OVIS: Visual tokenizer layer precisions before conversion:" )
365- for name , module in visual_tokenizer .named_modules ():
366- if isinstance (module , torch .nn .Linear ):
367- if hasattr (module , "weight" ) and module .weight is not None :
368- print (f" VT Layer { name } : { module .weight .dtype } " )
363+ # # Print visual tokenizer layer info before conversion
364+ # print("OVIS: Visual tokenizer layer precisions before conversion:")
365+ # for name, module in visual_tokenizer.named_modules():
366+ # if isinstance(module, torch.nn.Linear):
367+ # if hasattr(module, "weight") and module.weight is not None:
368+ # print(f" VT Layer {name}: {module.weight.dtype}")
369369
370- # Count modules before conversion
371- linear_count = sum (1 for module in visual_tokenizer .modules ()
372- if isinstance (module , torch .nn .Linear ))
373- print (f"OVIS: Found { linear_count } Linear modules in visual_tokenizer" )
374-
375- for module in visual_tokenizer .modules ():
376- if isinstance (module , torch .nn .Linear ):
377- old_dtype = module .weight .dtype if hasattr (module , "weight" ) else "unknown"
378- module .to (device = self .device , dtype = self .dtype )
379- new_dtype = module .weight .dtype if hasattr (module , "weight" ) else "unknown"
380- print (f"OVIS: Converting module from { old_dtype } to { self .dtype } , result={ new_dtype } " )
370+ # # Count modules before conversion
371+ # linear_count = sum(1 for module in visual_tokenizer.modules()
372+ # if isinstance(module, torch.nn.Linear))
373+ # print(f"OVIS: Found {linear_count} Linear modules in visual_tokenizer")
374+
375+ # for module in visual_tokenizer.modules():
376+ # if isinstance(module, torch.nn.Linear):
377+ # old_dtype = module.weight.dtype if hasattr(module, "weight") else "unknown"
378+ # module.to(device=self.device, dtype=self.dtype)
379+ # new_dtype = module.weight.dtype if hasattr(module, "weight") else "unknown"
380+ # print(f"OVIS: Converting module from {old_dtype} to {self.dtype}, result={new_dtype}")
381381
382- # Print visual tokenizer layer info after conversion
383- print ("OVIS: Visual tokenizer layer precisions after conversion:" )
384- for name , module in visual_tokenizer .named_modules ():
385- if isinstance (module , torch .nn .Linear ):
386- if hasattr (module , "weight" ) and module .weight is not None :
387- print (f" VT Layer { name } : { module .weight .dtype } " )
382+ # # Print visual tokenizer layer info after conversion
383+ # print("OVIS: Visual tokenizer layer precisions after conversion:")
384+ # for name, module in visual_tokenizer.named_modules():
385+ # if isinstance(module, torch.nn.Linear):
386+ # if hasattr(module, "weight") and module.weight is not None:
387+ # print(f" VT Layer {name}: {module.weight.dtype}")
388388
389389 # Save model for process_single_image
390390 self .model = model
@@ -399,29 +399,29 @@ def process_single_image(self, raw_image):
399399 )
400400 query = f"<image>\n { prompt } "
401401
402- print ("OVIS: Starting image processing" )
402+ # print("OVIS: Starting image processing")
403403 _ , input_ids , pixel_values = self .model .preprocess_inputs (query , [raw_image ])
404- print (f"OVIS: After preprocess_inputs - pixel_values dtype={ pixel_values .dtype } " )
404+ # print(f"OVIS: After preprocess_inputs - pixel_values dtype={pixel_values.dtype}")
405405
406406 attention_mask = torch .ne (input_ids , self .tokenizer .pad_token_id )
407407
408408 # Batchify and move to the correct device & dtype
409409 input_ids = input_ids .unsqueeze (0 ).to (self .device )
410410 attention_mask = attention_mask .unsqueeze (0 ).to (self .device )
411411
412- print (f"OVIS: Before pixel_values conversion - dtype={ pixel_values .dtype } " )
412+ # print(f"OVIS: Before pixel_values conversion - dtype={pixel_values.dtype}")
413413 pixel_values = pixel_values .to (device = self .device , dtype = self .dtype )
414- print (f"OVIS: After pixel_values conversion - dtype={ pixel_values .dtype } " )
414+ # print(f"OVIS: After pixel_values conversion - dtype={pixel_values.dtype}")
415415
416416 pixel_values = [pixel_values ] # wrap in list for generate()
417417
418- # Check model precision during inference
419- print ("OVIS: Model layer precisions during inference:" )
420- for name , module in self .model .named_modules ():
421- if isinstance (module , (torch .nn .Linear , torch .nn .Conv2d )):
422- if hasattr (module , "weight" ) and module .weight is not None :
423- if name .startswith ("transformer" ) or name .startswith ("lm_head" ):
424- print (f" Inference layer { name } : { module .weight .dtype } " )
418+ # # Check model precision during inference
419+ # print("OVIS: Model layer precisions during inference:")
420+ # for name, module in self.model.named_modules():
421+ # if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
422+ # if hasattr(module, "weight") and module.weight is not None:
423+ # if name.startswith("transformer") or name.startswith("lm_head"):
424+ # print(f" Inference layer {name}: {module.weight.dtype}")
425425
426426 gen_kwargs = {
427427 "max_new_tokens" : 1024 ,
0 commit comments