@@ -178,8 +178,7 @@ while True:
178178 break
179179 b_prompt = " [INST]{} [/INST]" .format(prompt) # prompt template for llama2
180180 inputs = tokenizer(b_prompt, return_tensors = " pt" ).input_ids
181- outputs = model.generate(inputs, streamer = streamer, interactive = True , ignore_prompt = True ,
182- num_beams = 1 , max_new_tokens = - 1 , ctx_size = 1024 , do_sample = True , threads = 28 , repetition_penalty = 1.1 )
181+ outputs = model.generate(inputs, streamer = streamer, interactive = True , ignore_prompt = True , do_sample = True )
183182```
184183
185184Chat with ChatGLM2:
@@ -199,10 +198,28 @@ while True:
199198 break
200199 prompt = tokenizer.build_prompt(prompt) # prompt template for chatglm2
201200 inputs = tokenizer([prompt], return_tensors = " pt" ).input_ids
202- outputs = model.generate(inputs, streamer = streamer, interactive = True , ignore_prompt = True ,
203- num_beams = 1 , max_new_tokens = - 1 , ctx_size = 1024 , do_sample = True , threads = 28 , repetition_penalty = 1.1 , n_keep = 2 )
201+ outputs = model.generate(inputs, streamer = streamer, interactive = True , ignore_prompt = True , do_sample = True , n_keep = 2 )
204202```
205203
204+ Chat with Qwen:
205+ ``` python
206+ from transformers import AutoTokenizer, TextStreamer
207+ from intel_extension_for_transformers.transformers import AutoModelForCausalLM, WeightOnlyQuantConfig
208+
209+ model_name = " Qwen/Qwen-7B-Chat" # or local path to model
210+ woq_config = WeightOnlyQuantConfig(compute_dtype = " int8" , weight_dtype = " int4" )
211+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True )
212+ streamer = TextStreamer(tokenizer)
213+ model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config = woq_config, trust_remote_code = True )
214+
215+ while True :
216+ prompt = input (" > " ).strip()
217+ if prompt == " quit" :
218+ break
219+ prompt = " \n <|im_start|>user\n {} <|im_end|>\n <|im_start|>assistant\n " .format(prompt) # prompt template for qwen
220+ inputs = tokenizer([prompt], return_tensors = " pt" ).input_ids
221+ outputs = model.generate(inputs, streamer = streamer, interactive = True , ignore_prompt = True , do_sample = True )
222+ ```
206223
207224## How to use: Python script
208225Install from binary
0 commit comments