File tree Expand file tree Collapse file tree 1 file changed +11
-9
lines changed
Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Original file line number Diff line number Diff line change @@ -135,15 +135,17 @@ def __init__(
135135 self .unspecial_piece_to_id = {}
136136
137137 tokenizer_json_path = os .path .join (self .config .model_dir , "tokenizer.json" )
138- if os .path .exists (tokenizer_json_path ):
139- with open (tokenizer_json_path , encoding = "utf8" ) as f :
140- tokenizer_json = json .load (f )
141- if "added_tokens" in tokenizer_json :
142- for v in tokenizer_json ["added_tokens" ]:
143- if v ["special" ]:
144- self .extended_piece_to_id [v ["content" ]] = v ["id" ]
145- else :
146- self .unspecial_piece_to_id [v ["content" ]] = v ["id" ]
138+ if not os .path .exists (tokenizer_json_path ):
139+ raise ValueError (" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported" )
140+
141+ with open (tokenizer_json_path , encoding = "utf8" ) as f :
142+ tokenizer_json = json .load (f )
143+ if "added_tokens" in tokenizer_json :
144+ for v in tokenizer_json ["added_tokens" ]:
145+ if v ["special" ]:
146+ self .extended_piece_to_id [v ["content" ]] = v ["id" ]
147+ else :
148+ self .unspecial_piece_to_id [v ["content" ]] = v ["id" ]
147149
148150 # Attempt to load tokenizer_config.json
149151
You can’t perform that action at this time.
0 commit comments