fix model name in examples

fcogidi · fcogidi · commit 5abef3c56fe0 · 2024-09-03T16:37:07.000-04:00
diff --git a/README.md b/README.md
@@ -53,15 +53,15 @@ vec-inf list
 
 You can also view the default setup for a specific supported model by providing the model name, for example `Meta-Llama-3.1-70B-Instruct`:
 ```bash
-vec-inf list Meta-Llama-3.1-70B-Instruct
+vec-inf list Meta-Llama-3.1-8B-Instruct
 ```
 <img width="400" alt="list_model_img" src="https://github.com/user-attachments/assets/5dec7a33-ba6b-490d-af47-4cf7341d0b42">
 
 `launch`, `list`, and `status` command supports `--json-mode`, where the command output would be structured as a JSON string.
 
 ## Send inference requests
 Once the inference server is ready, you can start sending in inference requests. We provide example scripts for sending inference requests in [`examples`](examples) folder. Make sure to update the model server URL and the model weights location in the scripts. For example, you can run `python examples/inference/llm/completions.py`, and you should expect to see an output like the following:
-> {"id":"cmpl-bdf43763adf242588af07af88b070b62","object":"text_completion","created":2983960,"model":"/model-weights/Llama-2-7b-hf","choices":[{"index":0,"text":"\nCanada is close to the actual continent of North America. Aside from the Arctic islands","logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":8,"total_tokens":28,"completion_tokens":20}}
+> {"id":"cmpl-c08d8946224747af9cce9f4d9f36ceb3","object":"text_completion","created":1725394970,"model":"Meta-Llama-3.1-8B-Instruct","choices":[{"index":0,"text":" is a question that many people may wonder. The answer is, of course, Ottawa. But if","logprobs":null,"finish_reason":"length","stop_reason":null}],"usage":{"prompt_tokens":8,"total_tokens":28,"completion_tokens":20}}
 
 **NOTE**: For multimodal models, currently only `ChatCompletion` is available, and only one image can be provided for each prompt.
 
diff --git a/examples/inference/llm/chat_completions.py b/examples/inference/llm/chat_completions.py
@@ -5,11 +5,14 @@
 
 # Update the model path accordingly
 completion = client.chat.completions.create(
-  model="/model-weights/Meta-Llama-3-8B-Instruct",
-  messages=[
-    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
-    {"role": "user", "content": "Who are you?"},
-  ]
+    model="Meta-Llama-3-8B-Instruct",
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a pirate chatbot who always responds in pirate speak!",
+        },
+        {"role": "user", "content": "Who are you?"},
+    ],
 )
 
-print(completion)
+print(completion)
diff --git a/examples/inference/llm/completions.py b/examples/inference/llm/completions.py
@@ -5,9 +5,9 @@
 
 # Update the model path accordingly
 completion = client.completions.create(
-    model="/model-weights/Meta-Llama-3-8B",
+    model="Meta-Llama-3.1-8B-Instruct",
     prompt="Where is the capital of Canada?",
     max_tokens=20,
 )
 
-print(completion)
+print(completion)
diff --git a/examples/inference/llm/completions.sh b/examples/inference/llm/completions.sh
@@ -5,7 +5,7 @@ export API_BASE_URL=http://gpuXXX:XXXX/v1
 curl ${API_BASE_URL}/completions \
    -H "Content-Type: application/json" \
    -d '{
-       "model": "/model-weights/Meta-Llama-3-8B",
+       "model": "Meta-Llama-3-8B",
        "prompt": "What is the capital of Canada?",
        "max_tokens": 20
    }'
diff --git a/examples/inference/vlm/vision_completions.py b/examples/inference/vlm/vision_completions.py
@@ -5,7 +5,7 @@
 
 # Update the model path accordingly
 completion = client.chat.completions.create(
-    model="/model-weights/llava-1.5-13b-hf",
+    model="llava-1.5-13b-hf",
     messages=[
         {
             "role": "user",
@@ -24,4 +24,3 @@
 )
 
 print(completion)
-
diff --git a/examples/logits/logits.py b/examples/logits/logits.py
@@ -4,10 +4,10 @@
 client = OpenAI(base_url="http://gpuXXX:XXXXX/v1", api_key="EMPTY")
 
 completion = client.completions.create(
-    model="/model-weights/Meta-Llama-3-8B",
+    model="Meta-Llama-3-8B",
     prompt="Where is the capital of Canada?",
     max_tokens=1,
-    logprobs=32000 # Set to model vocab size to get logits
+    logprobs=32000,  # Set to model vocab size to get logits
 )
 
 print(completion.choices[0].logprobs)
diff --git a/profile/gen.py b/profile/gen.py
@@ -1,15 +1,16 @@
-import requests
 import time
 
+import requests
+
 # Change the ENDPOINT and MODEL_PATH to match your setup
-ENDPOINT = "http://gpuXXX:XXXX/v1" 
+ENDPOINT = "http://gpuXXX:XXXX/v1"
 MODEL_PATH = "Meta-Llama-3-70B"
 
 # Configuration
-API_KEY = 'EMPTY'
+API_KEY = "EMPTY"
 HEADERS = {
-    'Authorization': f'Bearer {API_KEY}',
-    'Content-Type': 'application/json',
+    "Authorization": f"Bearer {API_KEY}",
+    "Content-Type": "application/json",
 }
 
 # Sample prompts for testing
@@ -66,15 +67,12 @@
     "What are the ethical implications of cloning?",
     "Explain the significance of the Pyramids of Giza.",
     "Describe the process of making wine.",
-    "How does the GPS system work?"
+    "How does the GPS system work?",
 ]
 
+
 def send_request(prompt):
-    data = {
-        'model': f"/model-weights/{MODEL_PATH}",
-        'prompt': prompt,
-        'max_tokens': 100
-    }
+    data = {"model": f"{MODEL_PATH}", "prompt": prompt, "max_tokens": 100}
     start_time = time.time()
     response = requests.post(f"{ENDPOINT}/completions", headers=HEADERS, json=data)
     duration = time.time() - start_time
@@ -83,13 +81,13 @@ def send_request(prompt):
     else:
         return None
 
+
 def main():
     for i in range(10):
         print("Sending 20x requests 0-52...")
         send_request(PROMPTS * 20)
     print("Done!")
 
 
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`
`6`	`6`	`# Update the model path accordingly`
`7`	`7`	`completion = client.chat.completions.create(`
`8`		`- model="/model-weights/llava-1.5-13b-hf",`
	`8`	`+ model="llava-1.5-13b-hf",`
`9`	`9`	`messages=[`
`10`	`10`	`{`
`11`	`11`	`"role": "user",`
`@@ -24,4 +24,3 @@`
`24`	`24`	`)`
`25`	`25`
`26`	`26`	`print(completion)`
`27`		`-`