diff --git a/README.md b/README.md
index fda3a6eb..2445b8f0 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ client.prompts.log(
     messages=[{"role": "user", "content": "What really happened at Roswell?"}],
     inputs={"person": "Trump"},
     created_at=datetime.datetime.fromisoformat(
-        "2024-07-19 00:29:35.178000+00:00",
+        "2024-07-18 23:29:35.178000+00:00",
     ),
     provider_latency=6.5931549072265625,
     output_message={
@@ -88,7 +88,7 @@ async def main() -> None:
         ],
         inputs={"person": "Trump"},
         created_at=datetime.datetime.fromisoformat(
-            "2024-07-19 00:29:35.178000+00:00",
+            "2024-07-18 23:29:35.178000+00:00",
         ),
         provider_latency=6.5931549072265625,
         output_message={
@@ -165,7 +165,6 @@ response = client.prompts.call_stream(
     ),
     source_datapoint_id="string",
     trace_parent_id="string",
-    batch_id="string",
     user="string",
     prompts_call_stream_request_environment="string",
     save=True,
diff --git a/pyproject.toml b/pyproject.toml
index 279764ab..a62db965 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "humanloop"
-version = "0.8.8"
+version = "0.8.9"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/reference.md b/reference.md
index aae70be1..27cd7ce4 100644
--- a/reference.md
+++ b/reference.md
@@ -56,7 +56,7 @@ client.prompts.log(
     messages=[{"role": "user", "content": "What really happened at Roswell?"}],
     inputs={"person": "Trump"},
     created_at=datetime.datetime.fromisoformat(
-        "2024-07-19 00:29:35.178000+00:00",
+        "2024-07-18 23:29:35.178000+00:00",
     ),
     provider_latency=6.5931549072265625,
     output_message={
@@ -100,7 +100,7 @@ client.prompts.log(
 <dl>
 <dd>
 
-**evaluation_id:** `typing.Optional[str]` — Unique identifier for the Evaluation Report to associate the Log to.
+**run_id:** `typing.Optional[str]` — Unique identifier for the Run to associate the Log to.
     
 </dd>
 </dl>
@@ -314,14 +314,6 @@ Controls how the model uses tools. The following options are supported:
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -682,7 +674,6 @@ response = client.prompts.call_stream(
     ),
     source_datapoint_id="string",
     trace_parent_id="string",
-    batch_id="string",
     user="string",
     prompts_call_stream_request_environment="string",
     save=True,
@@ -836,14 +827,6 @@ Controls how the model uses tools. The following options are supported:
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -1102,14 +1085,6 @@ Controls how the model uses tools. The following options are supported:
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -2525,14 +2500,6 @@ client.tools.log(
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -4497,6 +4464,14 @@ client.datasets.list_versions(
 <dl>
 <dd>
 
+**include_datapoints:** `typing.Optional[typing.Literal["latest_committed"]]` — If set to 'latest_committed', include the Datapoints for the latest committed version. Defaults to `None`.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
@@ -5157,14 +5132,6 @@ client.evaluators.log(
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -6258,10 +6225,10 @@ client.flows.log(
     output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
     trace_status="incomplete",
     start_time=datetime.datetime.fromisoformat(
-        "2024-07-08 22:40:35+00:00",
+        "2024-07-08 21:40:35+00:00",
     ),
     end_time=datetime.datetime.fromisoformat(
-        "2024-07-08 22:40:39+00:00",
+        "2024-07-08 21:40:39+00:00",
     ),
 )
 
@@ -6295,7 +6262,7 @@ client.flows.log(
 <dl>
 <dd>
 
-**evaluation_id:** `typing.Optional[str]` — Unique identifier for the Evaluation Report to associate the Log to.
+**run_id:** `typing.Optional[str]` — Unique identifier for the Run to associate the Log to.
     
 </dd>
 </dl>
@@ -6431,14 +6398,6 @@ client.flows.log(
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
 **user:** `typing.Optional[str]` — End-user ID related to the Log.
     
 </dd>
@@ -8212,16 +8171,10 @@ for page in response.iter_pages():
 
 Create an Evaluation.
 
-Create a new Evaluation by specifying the Dataset, versions to be
-evaluated (Evaluatees), and which Evaluators to provide judgments.
+Create an Evaluation by specifying the File to evaluate, and a name
+for the Evaluation.
 
-Humanloop will automatically start generating Logs and running Evaluators where
-`orchestrated=true`. If you own the runtime for the Evaluatee or Evaluator, you
-can set `orchestrated=false` and then generate and submit the required logs using
-your runtime.
-
-To keep updated on the progress of the Evaluation, you can poll the Evaluation using
-the `GET /evaluations/:id` endpoint and check its status.
+You can then add Runs to this Evaluation using the `POST /evaluations/{id}/runs` endpoint.
 </dd>
 </dl>
 </dd>
@@ -8242,11 +8195,7 @@ client = Humanloop(
     api_key="YOUR_API_KEY",
 )
 client.evaluations.create(
-    dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-    evaluatees=[
-        {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-    ],
-    evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
+    evaluators=[{}],
 )
 
 ```
@@ -8263,7 +8212,7 @@ client.evaluations.create(
 <dl>
 <dd>
 
-**dataset:** `EvaluationsDatasetRequestParams` — Dataset to use in this Evaluation.
+**evaluators:** `typing.Sequence[EvaluationsRequestParams]` — The Evaluators used to evaluate.
     
 </dd>
 </dl>
@@ -8271,7 +8220,7 @@ client.evaluations.create(
 <dl>
 <dd>
 
-**evaluators:** `typing.Sequence[EvaluationsRequestParams]` — The Evaluators used to evaluate.
+**file:** `typing.Optional[FileRequestParams]` — The File to associate with the Evaluation. This File contains the Logs you're evaluating.
     
 </dd>
 </dl>
@@ -8279,7 +8228,7 @@ client.evaluations.create(
 <dl>
 <dd>
 
-**evaluatees:** `typing.Optional[typing.Sequence[EvaluateeRequestParams]]` — Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add Evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
+**name:** `typing.Optional[str]` — Name of the Evaluation to help identify it. Must be unique within the associated File.
     
 </dd>
 </dl>
@@ -8287,15 +8236,163 @@ client.evaluations.create(
 <dl>
 <dd>
 
-**name:** `typing.Optional[str]` — Name of the Evaluation to help identify it. Must be unique within the associated File.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
+</dd>
+</dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">add_evaluators</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Add Evaluators to an Evaluation.
+
+Add new Evaluators to an Evaluation. The Evaluators will be run on the Logs
+generated for the Evaluation.
+</dd>
+</dl>
 </dd>
 </dl>
 
+#### 🔌 Usage
+
 <dl>
 <dd>
 
-**file:** `typing.Optional[FileRequestParams]` — The File to associate with the Evaluation.
+<dl>
+<dd>
+
+```python
+from humanloop import Humanloop
+
+client = Humanloop(
+    api_key="YOUR_API_KEY",
+)
+client.evaluations.add_evaluators(
+    id="id",
+    evaluators=[{}],
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**id:** `str` — Unique identifier for Evaluation.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**evaluators:** `typing.Sequence[EvaluationsRequestParams]` — The Evaluators to add to this Evaluation.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
+</dd>
+</dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">remove_evaluator</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Remove an Evaluator from an Evaluation.
+
+Remove an Evaluator from an Evaluation. The Evaluator will no longer be run on the Logs
+generated for the Evaluation.
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from humanloop import Humanloop
+
+client = Humanloop(
+    api_key="YOUR_API_KEY",
+)
+client.evaluations.remove_evaluator(
+    id="id",
+    evaluator_version_id="evaluator_version_id",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**id:** `str` — Unique identifier for Evaluation.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**evaluator_version_id:** `str` — Unique identifier for Evaluator Version.
     
 </dd>
 </dl>
@@ -8458,7 +8555,7 @@ client.evaluations.delete(
 </dl>
 </details>
 
-<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">update_setup</a>(...)</code></summary>
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">list_runs_for_evaluation</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -8470,10 +8567,7 @@ client.evaluations.delete(
 <dl>
 <dd>
 
-Update an Evaluation.
-
-Update the setup of an Evaluation by specifying the Dataset, versions to be
-evaluated (Evaluatees), and which Evaluators to provide judgments.
+List all Runs for an Evaluation.
 </dd>
 </dl>
 </dd>
@@ -8493,13 +8587,8 @@ from humanloop import Humanloop
 client = Humanloop(
     api_key="YOUR_API_KEY",
 )
-client.evaluations.update_setup(
-    id="ev_567yza",
-    dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-    evaluatees=[
-        {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-    ],
-    evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
+client.evaluations.list_runs_for_evaluation(
+    id="id",
 )
 
 ```
@@ -8524,7 +8613,87 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-**dataset:** `typing.Optional[EvaluationsDatasetRequestParams]` — Dataset to use in this Evaluation.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
+</dd>
+</dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">create_run</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Create an Evaluation Run.
+
+Create a new Evaluation Run. Optionally specify the Dataset and version to be
+evaluated.
+
+Humanloop will automatically start generating Logs and running Evaluators where
+`orchestrated=true`. If you are generating Logs yourself, you can set `orchestrated=false`
+and then generate and submit the required Logs via the API.
+
+The `logs` parameter controls which Logs are associated with the Run. Defaults to `dynamic`
+if `dataset` and `version` are provided. This means that Logs will automatically be retrieved
+if they're associated with the specified Version and has `source_datapoint_id` referencing
+a datapoint in the specified Dataset.
+If `logs` is set to `fixed`, no existing Logs will be automatically associated with the Run.
+You can then add Logs to the Run using the `POST /evaluations/{id}/runs/{run_id}/logs` endpoint,
+or by adding `run_id` to your `POST /prompts/logs` requests.
+
+To keep updated on the progress of the Run, you can poll the Run using
+the `GET /evaluations/{id}/runs` endpoint and check its status.
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from humanloop import Humanloop
+
+client = Humanloop(
+    api_key="YOUR_API_KEY",
+)
+client.evaluations.create_run(
+    id="id",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**id:** `str` — Unique identifier for Evaluation.
     
 </dd>
 </dl>
@@ -8532,7 +8701,7 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-**evaluatees:** `typing.Optional[typing.Sequence[EvaluateeRequestParams]]` — Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
+**dataset:** `typing.Optional[EvaluationsDatasetRequestParams]` — Dataset to use in this Run.
     
 </dd>
 </dl>
@@ -8540,7 +8709,7 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-**evaluators:** `typing.Optional[typing.Sequence[EvaluationsRequestParams]]` — The Evaluators used to evaluate.
+**version:** `typing.Optional[VersionSpecificationParams]` — Version to use in this Run.
     
 </dd>
 </dl>
@@ -8548,7 +8717,7 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-**name:** `typing.Optional[str]` — Name of the Evaluation to help identify it. Must be unique within the associated File.
+**orchestrated:** `typing.Optional[bool]` — Whether the Run is orchestrated by Humanloop. If `True`, Humanloop will generate Logs for the Run; `dataset` and `version` must be provided. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
     
 </dd>
 </dl>
@@ -8556,7 +8725,7 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-**file:** `typing.Optional[FileRequestParams]` — The File to associate with the Evaluation.
+**logs:** `typing.Optional[LogsAssociationType]` — How the Logs are associated with the Run. If `dynamic`, the latest relevant Logs will be inferred from the Dataset and Version. If `fixed`, the Logs will be explicitly associated. You can provide a list of Log IDs to associate with the Run, or add them to the Run later. Defaults to `dynamic` if `dataset` and `version` are provided; otherwise, defaults to `fixed`.
     
 </dd>
 </dl>
@@ -8576,7 +8745,7 @@ client.evaluations.update_setup(
 </dl>
 </details>
 
-<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">update_status</a>(...)</code></summary>
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">add_existing_run</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -8588,10 +8757,7 @@ client.evaluations.update_setup(
 <dl>
 <dd>
 
-Update the status of an Evaluation.
-
-Can be used to cancel a running Evaluation, or mark an Evaluation that uses
-external or human evaluators as completed.
+Add an existing Run to an Evaluation.
 </dd>
 </dl>
 </dd>
@@ -8611,9 +8777,9 @@ from humanloop import Humanloop
 client = Humanloop(
     api_key="YOUR_API_KEY",
 )
-client.evaluations.update_status(
+client.evaluations.add_existing_run(
     id="id",
-    status="pending",
+    run_id="run_id",
 )
 
 ```
@@ -8638,7 +8804,7 @@ client.evaluations.update_status(
 <dl>
 <dd>
 
-**status:** `EvaluationStatus` 
+**run_id:** `str` — Unique identifier for Run.
     
 </dd>
 </dl>
@@ -8658,7 +8824,7 @@ client.evaluations.update_status(
 </dl>
 </details>
 
-<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">get_stats</a>(...)</code></summary>
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">remove_run_from_evaluation</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -8670,11 +8836,10 @@ client.evaluations.update_status(
 <dl>
 <dd>
 
-Get Evaluation Stats.
+Remove a Run from an Evaluation.
 
-Retrieve aggregate stats for the specified Evaluation.
-This includes the number of generated Logs for each evaluated version and the
-corresponding Evaluator statistics (such as the mean and percentiles).
+Remove a Run from an Evaluation. The Logs and Versions used in the Run will not be deleted.
+If this Run is used in any other Evaluations, it will still be available in those Evaluations.
 </dd>
 </dl>
 </dd>
@@ -8694,8 +8859,9 @@ from humanloop import Humanloop
 client = Humanloop(
     api_key="YOUR_API_KEY",
 )
-client.evaluations.get_stats(
+client.evaluations.remove_run_from_evaluation(
     id="id",
+    run_id="run_id",
 )
 
 ```
@@ -8720,6 +8886,14 @@ client.evaluations.get_stats(
 <dl>
 <dd>
 
+**run_id:** `str` — Unique identifier for Run.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
@@ -8732,7 +8906,7 @@ client.evaluations.get_stats(
 </dl>
 </details>
 
-<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">get_logs</a>(...)</code></summary>
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">update_evaluation_run</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -8744,10 +8918,9 @@ client.evaluations.get_stats(
 <dl>
 <dd>
 
-Get the Logs associated to a specific Evaluation.
+Update an Evaluation Run.
 
-Each Datapoint in your Dataset will have a corresponding Log for each File version evaluated.
-e.g. If you have 50 Datapoints and are evaluating 2 Prompts, there will be 100 Logs associated with the Evaluation.
+Update the Dataset and version to be evaluated for an existing Run.
 </dd>
 </dl>
 </dd>
@@ -8767,8 +8940,10 @@ from humanloop import Humanloop
 client = Humanloop(
     api_key="YOUR_API_KEY",
 )
-client.evaluations.get_logs(
+client.evaluations.update_evaluation_run(
     id="id",
+    run_id="run_id",
+    control=True,
 )
 
 ```
@@ -8785,7 +8960,7 @@ client.evaluations.get_logs(
 <dl>
 <dd>
 
-**id:** `str` — String ID of evaluation. Starts with `ev_` or `evr_`.
+**id:** `str` — Unique identifier for Evaluation.
     
 </dd>
 </dl>
@@ -8793,7 +8968,7 @@ client.evaluations.get_logs(
 <dl>
 <dd>
 
-**page:** `typing.Optional[int]` — Page number for pagination.
+**run_id:** `str` — Unique identifier for Run.
     
 </dd>
 </dl>
@@ -8801,7 +8976,7 @@ client.evaluations.get_logs(
 <dl>
 <dd>
 
-**size:** `typing.Optional[int]` — Page size for pagination. Number of Logs to fetch.
+**control:** `bool` — If `True`, this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
     
 </dd>
 </dl>
@@ -8821,7 +8996,7 @@ client.evaluations.get_logs(
 </dl>
 </details>
 
-<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">pin_evaluatee</a>(...)</code></summary>
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">add_logs_to_run</a>(...)</code></summary>
 <dl>
 <dd>
 
@@ -8833,10 +9008,11 @@ client.evaluations.get_logs(
 <dl>
 <dd>
 
-Pin the specified Evaluatee.
+Add Logs to an Evaluation Run.
 
-Pinned Evaluatees are always displayed in the Evaluation Overview,
-and serve as the baseline for comparison with other Evaluatees.
+This is supported only for Runs that have a fixed set of Logs.
+(Runs can either have a fixed set of Logs, or can be set to dynamically retrieve the latest Logs
+if a Dataset and Version are provided.)
 </dd>
 </dl>
 </dd>
@@ -8856,8 +9032,10 @@ from humanloop import Humanloop
 client = Humanloop(
     api_key="YOUR_API_KEY",
 )
-client.evaluations.pin_evaluatee(
+client.evaluations.add_logs_to_run(
     id="id",
+    run_id="run_id",
+    log_ids=["log_ids"],
 )
 
 ```
@@ -8882,7 +9060,7 @@ client.evaluations.pin_evaluatee(
 <dl>
 <dd>
 
-**version_id:** `typing.Optional[str]` — Unique identifier for the File Version. If provided, none of the other fields should be specified.
+**run_id:** `str` — Unique identifier for Run.
     
 </dd>
 </dl>
@@ -8890,7 +9068,7 @@ client.evaluations.pin_evaluatee(
 <dl>
 <dd>
 
-**path:** `typing.Optional[str]` — Path identifying a File. Provide either this or `file_id` if you want to specify a File.
+**log_ids:** `typing.Sequence[str]` — The IDs of the Logs to add to the Run.
     
 </dd>
 </dl>
@@ -8898,15 +9076,74 @@ client.evaluations.pin_evaluatee(
 <dl>
 <dd>
 
-**file_id:** `typing.Optional[str]` — Unique identifier for the File. Provide either this or `path` if you want to specify a File.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
+
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">get_stats</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
 
 <dl>
 <dd>
 
-**environment:** `typing.Optional[str]` — Name of the Environment a Version is deployed to. Only provide this when specifying a File. If not provided (and a File is specified), the default Environment is used.
+Get Evaluation Stats.
+
+Retrieve aggregate stats for the specified Evaluation.
+
+This includes the number of generated Logs for each Run and the
+corresponding Evaluator statistics (such as the mean and percentiles).
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from humanloop import Humanloop
+
+client = Humanloop(
+    api_key="YOUR_API_KEY",
+)
+client.evaluations.get_stats(
+    id="id",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**id:** `str` — Unique identifier for Evaluation.
     
 </dd>
 </dl>
@@ -8914,15 +9151,85 @@ client.evaluations.pin_evaluatee(
 <dl>
 <dd>
 
-**batch_id:** `typing.Optional[str]` — Unique identifier for the batch of Logs to include in the Evaluation Report.
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
     
 </dd>
 </dl>
+</dd>
+</dl>
+
+
+</dd>
+</dl>
+</details>
 
+<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">get_logs</a>(...)</code></summary>
 <dl>
 <dd>
 
-**orchestrated:** `typing.Optional[bool]` — Whether the Prompt/Tool is orchestrated by Humanloop. Default is `True`. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Get the Logs associated to a specific Evaluation.
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from humanloop import Humanloop
+
+client = Humanloop(
+    api_key="YOUR_API_KEY",
+)
+client.evaluations.get_logs(
+    id="id",
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**id:** `str` — String ID of evaluation. Starts with `ev_` or `evr_`.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**page:** `typing.Optional[int]` — Page number for pagination.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**size:** `typing.Optional[int]` — Page size for pagination. Number of Logs to fetch.
     
 </dd>
 </dl>
diff --git a/src/humanloop/__init__.py b/src/humanloop/__init__.py
index f782b29d..ac2e2567 100644
--- a/src/humanloop/__init__.py
+++ b/src/humanloop/__init__.py
@@ -27,12 +27,13 @@
     DirectoryWithParentsAndChildrenResponseFilesItem,
     EnvironmentResponse,
     EnvironmentTag,
-    EvaluatedVersionResponse,
     EvaluateeRequest,
     EvaluateeResponse,
     EvaluationEvaluatorResponse,
-    EvaluationReportLogResponse,
+    EvaluationLogResponse,
     EvaluationResponse,
+    EvaluationRunResponse,
+    EvaluationRunsResponse,
     EvaluationStats,
     EvaluationStatus,
     EvaluationsDatasetRequest,
@@ -77,6 +78,7 @@
     ListTools,
     LlmEvaluatorRequest,
     LogResponse,
+    LogsAssociationType,
     ModelEndpoints,
     ModelProviders,
     MonitoringEvaluatorEnvironmentRequest,
@@ -86,7 +88,7 @@
     NumericEvaluatorStatsResponse,
     ObservabilityStatus,
     OverallStats,
-    PaginatedDataEvaluationReportLogResponse,
+    PaginatedDataEvaluationLogResponse,
     PaginatedDataEvaluatorResponse,
     PaginatedDataFlowResponse,
     PaginatedDataLogResponse,
@@ -116,6 +118,9 @@
     ProviderApiKeys,
     ResponseFormat,
     ResponseFormatType,
+    RunStatsResponse,
+    RunStatsResponseEvaluatorStatsItem,
+    RunVersionResponse,
     SelectEvaluatorStatsResponse,
     SortOrder,
     TextChatContent,
@@ -139,6 +144,7 @@
     VersionIdResponse,
     VersionIdResponseVersion,
     VersionReferenceResponse,
+    VersionSpecification,
     VersionStatsResponse,
     VersionStatsResponseEvaluatorVersionStatsItem,
     VersionStatus,
@@ -191,12 +197,13 @@
     DirectoryWithParentsAndChildrenResponseFilesItemParams,
     DirectoryWithParentsAndChildrenResponseParams,
     EnvironmentResponseParams,
-    EvaluatedVersionResponseParams,
     EvaluateeRequestParams,
     EvaluateeResponseParams,
     EvaluationEvaluatorResponseParams,
-    EvaluationReportLogResponseParams,
+    EvaluationLogResponseParams,
     EvaluationResponseParams,
+    EvaluationRunResponseParams,
+    EvaluationRunsResponseParams,
     EvaluationStatsParams,
     EvaluationsDatasetRequestParams,
     EvaluationsRequestParams,
@@ -238,7 +245,7 @@
     MonitoringEvaluatorVersionRequestParams,
     NumericEvaluatorStatsResponseParams,
     OverallStatsParams,
-    PaginatedDataEvaluationReportLogResponseParams,
+    PaginatedDataEvaluationLogResponseParams,
     PaginatedDataEvaluatorResponseParams,
     PaginatedDataFlowResponseParams,
     PaginatedDataLogResponseParams,
@@ -263,6 +270,9 @@
     PromptResponseTemplateParams,
     ProviderApiKeysParams,
     ResponseFormatParams,
+    RunStatsResponseEvaluatorStatsItemParams,
+    RunStatsResponseParams,
+    RunVersionResponseParams,
     SelectEvaluatorStatsResponseParams,
     TextChatContentParams,
     TextEvaluatorStatsResponseParams,
@@ -279,6 +289,7 @@
     VersionIdResponseParams,
     VersionIdResponseVersionParams,
     VersionReferenceResponseParams,
+    VersionSpecificationParams,
     VersionStatsResponseEvaluatorVersionStatsItemParams,
     VersionStatsResponseParams,
 )
@@ -337,18 +348,20 @@
     "EnvironmentResponse",
     "EnvironmentResponseParams",
     "EnvironmentTag",
-    "EvaluatedVersionResponse",
-    "EvaluatedVersionResponseParams",
     "EvaluateeRequest",
     "EvaluateeRequestParams",
     "EvaluateeResponse",
     "EvaluateeResponseParams",
     "EvaluationEvaluatorResponse",
     "EvaluationEvaluatorResponseParams",
-    "EvaluationReportLogResponse",
-    "EvaluationReportLogResponseParams",
+    "EvaluationLogResponse",
+    "EvaluationLogResponseParams",
     "EvaluationResponse",
     "EvaluationResponseParams",
+    "EvaluationRunResponse",
+    "EvaluationRunResponseParams",
+    "EvaluationRunsResponse",
+    "EvaluationRunsResponseParams",
     "EvaluationStats",
     "EvaluationStatsParams",
     "EvaluationStatus",
@@ -431,6 +444,7 @@
     "LlmEvaluatorRequestParams",
     "LogResponse",
     "LogResponseParams",
+    "LogsAssociationType",
     "ModelEndpoints",
     "ModelProviders",
     "MonitoringEvaluatorEnvironmentRequest",
@@ -445,8 +459,8 @@
     "ObservabilityStatus",
     "OverallStats",
     "OverallStatsParams",
-    "PaginatedDataEvaluationReportLogResponse",
-    "PaginatedDataEvaluationReportLogResponseParams",
+    "PaginatedDataEvaluationLogResponse",
+    "PaginatedDataEvaluationLogResponseParams",
     "PaginatedDataEvaluatorResponse",
     "PaginatedDataEvaluatorResponseParams",
     "PaginatedDataFlowResponse",
@@ -512,6 +526,12 @@
     "ResponseFormat",
     "ResponseFormatParams",
     "ResponseFormatType",
+    "RunStatsResponse",
+    "RunStatsResponseEvaluatorStatsItem",
+    "RunStatsResponseEvaluatorStatsItemParams",
+    "RunStatsResponseParams",
+    "RunVersionResponse",
+    "RunVersionResponseParams",
     "SelectEvaluatorStatsResponse",
     "SelectEvaluatorStatsResponseParams",
     "SortOrder",
@@ -554,6 +574,8 @@
     "VersionIdResponseVersionParams",
     "VersionReferenceResponse",
     "VersionReferenceResponseParams",
+    "VersionSpecification",
+    "VersionSpecificationParams",
     "VersionStatsResponse",
     "VersionStatsResponseEvaluatorVersionStatsItem",
     "VersionStatsResponseEvaluatorVersionStatsItemParams",
diff --git a/src/humanloop/core/client_wrapper.py b/src/humanloop/core/client_wrapper.py
index 04653533..4282222b 100644
--- a/src/humanloop/core/client_wrapper.py
+++ b/src/humanloop/core/client_wrapper.py
@@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "humanloop",
-            "X-Fern-SDK-Version": "0.8.8",
+            "X-Fern-SDK-Version": "0.8.9",
         }
         headers["X-API-KEY"] = self.api_key
         return headers
diff --git a/src/humanloop/datasets/client.py b/src/humanloop/datasets/client.py
index a4e5dd99..ddfede4d 100644
--- a/src/humanloop/datasets/client.py
+++ b/src/humanloop/datasets/client.py
@@ -602,6 +602,7 @@ def list_versions(
         id: str,
         *,
         status: typing.Optional[VersionStatus] = None,
+        include_datapoints: typing.Optional[typing.Literal["latest_committed"]] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ListDatasets:
         """
@@ -615,6 +616,9 @@ def list_versions(
         status : typing.Optional[VersionStatus]
             Filter versions by status: 'uncommitted', 'committed'. If no status is provided, all versions are returned.
 
+        include_datapoints : typing.Optional[typing.Literal["latest_committed"]]
+            If set to 'latest_committed', include the Datapoints for the latest committed version. Defaults to `None`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -640,6 +644,7 @@ def list_versions(
             method="GET",
             params={
                 "status": status,
+                "include_datapoints": include_datapoints,
             },
             request_options=request_options,
         )
@@ -1647,6 +1652,7 @@ async def list_versions(
         id: str,
         *,
         status: typing.Optional[VersionStatus] = None,
+        include_datapoints: typing.Optional[typing.Literal["latest_committed"]] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ListDatasets:
         """
@@ -1660,6 +1666,9 @@ async def list_versions(
         status : typing.Optional[VersionStatus]
             Filter versions by status: 'uncommitted', 'committed'. If no status is provided, all versions are returned.
 
+        include_datapoints : typing.Optional[typing.Literal["latest_committed"]]
+            If set to 'latest_committed', include the Datapoints for the latest committed version. Defaults to `None`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1693,6 +1702,7 @@ async def main() -> None:
             method="GET",
             params={
                 "status": status,
+                "include_datapoints": include_datapoints,
             },
             request_options=request_options,
         )
diff --git a/src/humanloop/eval_utils.py b/src/humanloop/eval_utils.py
index e5112d19..c6bc3b98 100644
--- a/src/humanloop/eval_utils.py
+++ b/src/humanloop/eval_utils.py
@@ -18,7 +18,6 @@
 from typing_extensions import NotRequired, TypedDict
 import time
 import sys
-import uuid
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 from .client import BaseHumanloop
@@ -41,11 +40,13 @@
 from .types import ToolKernelRequest as Tool
 from .types import BooleanEvaluatorStatsResponse as BooleanStats
 from .types import NumericEvaluatorStatsResponse as NumericStats
-from .types import UpdateDatesetAction as UpdateDatasetAction  # TODO: fix original type typo
+from .types import (
+    UpdateDatesetAction as UpdateDatasetAction,
+)  # TODO: fix original type typo
 from .types import DatapointResponse as Datapoint
 from .types import (
     EvaluationStats,
-    VersionStatsResponse,
+    RunStatsResponse,
     EvaluatorArgumentsType,
     EvaluatorReturnTypeEnum,
     EvaluationResponse,
@@ -61,7 +62,9 @@
 if not logger.hasHandlers():
     logger.addHandler(console_handler)
 
-EvaluatorDict = Union[CodeEvaluatorDict, LLMEvaluatorDict, HumanEvaluatorDict, ExternalEvaluator]
+EvaluatorDict = Union[
+    CodeEvaluatorDict, LLMEvaluatorDict, HumanEvaluatorDict, ExternalEvaluator
+]
 Version = Union[FlowDict, PromptDict, ToolDict, EvaluatorDict]
 FileType = Literal["flow", "prompt", "tool", "evaluator"]
 
@@ -202,9 +205,13 @@ def _run_eval(
         function_ = file.pop("callable")
     except KeyError as _:
         if type_ == "flow":
-            raise ValueError("You must provide a `callable` for your Flow `file` to run a local eval.")
+            raise ValueError(
+                "You must provide a `callable` for your Flow `file` to run a local eval."
+            )
         else:
-            logger.info(f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop.")
+            logger.info(
+                f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop."
+            )
 
     custom_logger = file.pop("custom_logger", None)
     file_dict = {**file, **version}
@@ -222,7 +229,9 @@ def _run_eval(
         try:
             _ = Prompt.parse_obj(version)
         except ValidationError as error_:
-            logger.error(msg=f"Invalid Prompt `version` in your `file` request. \n\nValidation error: \n)")
+            logger.error(
+                msg=f"Invalid Prompt `version` in your `file` request. \n\nValidation error: \n)"
+            )
             raise error_
         hl_file = client.prompts.upsert(**file_dict)
 
@@ -230,7 +239,9 @@ def _run_eval(
         try:
             _ = Tool.parse_obj(version)
         except ValidationError as error_:
-            logger.error(msg=f"Invalid Tool `version` in your `file` request. \n\nValidation error: \n)")
+            logger.error(
+                msg=f"Invalid Tool `version` in your `file` request. \n\nValidation error: \n)"
+            )
             raise error_
         hl_file = client.tools.upsert(**file_dict)
 
@@ -263,7 +274,9 @@ def _run_eval(
                     attributes={"code": inspect.getsource(eval_function)},
                     evaluator_type="external",
                 )
-                _ = client.evaluators.upsert(id=evaluator.get("id"), path=evaluator.get("path"), spec=spec)
+                _ = client.evaluators.upsert(
+                    id=evaluator.get("id"), path=evaluator.get("path"), spec=spec
+                )
 
     # Validate upfront that the local Evaluators and Dataset fit
     requires_target = False
@@ -286,7 +299,6 @@ def _run_eval(
     try:
         evaluation = client.evaluations.create(
             name=name,
-            dataset={"file_id": hl_dataset.id},
             evaluators=[{"path": e["path"]} for e in evaluators],
             file={"id": hl_file.id},
         )
@@ -301,15 +313,22 @@ def _run_eval(
         if not evaluation:
             raise ValueError(f"Evaluation with name {name} not found.")
 
-    # Every run will generate a new batch of logs
-    batch_id = uuid.uuid4().hex[:10]  # ignore risk of collision
+    # Create a new Run
+    run = client.evaluations.create_run(
+        id=evaluation.id,
+        dataset={"file_id": hl_dataset.id},
+        logs="fixed",
+        orchestrated=False,
+    )
+
+    # Every Run will generate a new batch of Logs
+    run_id = run.id
     log_func = _get_log_func(
         client=client,
         type_=type_,
         file_id=hl_file.id,
         version_id=hl_file.version_id,
-        evaluation_id=evaluation.id,
-        batch_id=batch_id,
+        run_id=run_id,
     )
 
     # Define the function to execute your function in parallel and Log to Humanloop
@@ -318,7 +337,9 @@ def process_datapoint(datapoint: Datapoint):
         datapoint_dict = datapoint.dict()
         try:
             if "messages" in datapoint_dict:
-                output = function_(**datapoint_dict["inputs"], messages=datapoint_dict["messages"])
+                output = function_(
+                    **datapoint_dict["inputs"], messages=datapoint_dict["messages"]
+                )
             else:
                 output = function_(**datapoint_dict["inputs"])
             if custom_logger:
@@ -343,7 +364,9 @@ def process_datapoint(datapoint: Datapoint):
                 start_time=start_time,
                 end_time=datetime.now(),
             )
-            logger.warning(msg=f"\nYour {type_}'s `callable` failed for Datapoint: {datapoint.id}. \n Error: {str(e)}")
+            logger.warning(
+                msg=f"\nYour {type_}'s `callable` failed for Datapoint: {datapoint.id}. \n Error: {str(e)}"
+            )
 
         # Apply local Evaluators
         for local_evaluator in local_evaluators:
@@ -376,28 +399,35 @@ def process_datapoint(datapoint: Datapoint):
                     start_time=start_time,
                     end_time=datetime.now(),
                 )
-                logger.warning(f"\nEvaluator {local_evaluator['path']} failed with error {str(e)}")
+                logger.warning(
+                    f"\nEvaluator {local_evaluator['path']} failed with error {str(e)}"
+                )
 
     # Execute the function and send the logs to Humanloop in parallel
     total_datapoints = len(hl_dataset.datapoints)
     logger.info(f"\n{CYAN}Navigate to your Evaluation:{RESET}\n{evaluation.url}\n")
     logger.info(f"{CYAN}{type_.capitalize()} Version ID: {hl_file.version_id}{RESET}")
-    logger.info(f"{CYAN}Run ID: {batch_id}{RESET}")
+    logger.info(f"{CYAN}Run ID: {run_id}{RESET}")
 
     # Generate locally if a file `callable` is provided
     if function_:
         logger.info(
-            f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name} using {workers} workers{RESET} "
+            f"{CYAN}\nRunning '{hl_file.name}' over the Dataset '{hl_dataset.name}' using {workers} workers{RESET} "
         )
         completed_tasks = 0
         with ThreadPoolExecutor(max_workers=workers) as executor:
-            futures = [executor.submit(process_datapoint, datapoint) for datapoint in hl_dataset.datapoints]
+            futures = [
+                executor.submit(process_datapoint, datapoint)
+                for datapoint in hl_dataset.datapoints
+            ]
             for _ in as_completed(futures):
                 completed_tasks += 1
                 _progress_bar(total_datapoints, completed_tasks)
     else:
         # TODO: trigger run when updated API is available
-        logger.info(f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name}{RESET}")
+        logger.info(
+            f"{CYAN}\nRunning '{hl_file.name}' over the Dataset '{hl_dataset.name}'{RESET}"
+        )
 
     # Wait for the Evaluation to complete then print the results
     complete = False
@@ -413,39 +443,43 @@ def process_datapoint(datapoint: Datapoint):
     logger.info(stats.report)
 
     checks: List[EvaluatorCheck] = []
-    if all(evaluator.get("threshold") is None for evaluator in evaluators) and len(stats.version_stats) == 1:
-        # Skip `check_evaluation_improvement` if no thresholds were provided and there is only one run.
-        # (Or the logs would not be helpful)
-        return checks
-    for evaluator in evaluators:
-        _, score, delta = check_evaluation_improvement(
-            evaluation=evaluation,
-            stats=stats,
-            evaluator_path=evaluator["path"],
-            batch_id=batch_id,
-        )
-        threshold_check = None
-        threshold = evaluator.get("threshold")
-        if threshold is not None:
-            threshold_check = check_evaluation_threshold(
+
+    # Skip `check_evaluation_improvement` if no thresholds were provided and there is only one run.
+    # (Or the logs would not be helpful)
+    if (
+        any(evaluator.get("threshold") is not None for evaluator in evaluators)
+        or len(stats.run_stats) > 1
+    ):
+        for evaluator in evaluators:
+            _, score, delta = check_evaluation_improvement(
                 evaluation=evaluation,
                 stats=stats,
                 evaluator_path=evaluator["path"],
-                threshold=threshold,
-                batch_id=batch_id,
+                run_id=run_id,
             )
-        checks.append(
-            EvaluatorCheck(
-                path=evaluator["path"],
-                # TODO: Add back in with number valence on Evaluators
-                # improvement_check=improvement_check,
-                score=score,
-                delta=delta,
-                threshold=threshold,
-                threshold_check=threshold_check,
-                evaluation_id=evaluation.id,
+            threshold_check = None
+            threshold = evaluator.get("threshold")
+            if threshold is not None:
+                threshold_check = check_evaluation_threshold(
+                    evaluation=evaluation,
+                    stats=stats,
+                    evaluator_path=evaluator["path"],
+                    threshold=threshold,
+                    run_id=run_id,
+                )
+            checks.append(
+                EvaluatorCheck(
+                    path=evaluator["path"],
+                    # TODO: Add back in with number valence on Evaluators
+                    # improvement_check=improvement_check,
+                    score=score,
+                    delta=delta,
+                    threshold=threshold,
+                    threshold_check=threshold_check,
+                    evaluation_id=evaluation.id,
+                )
             )
-        )
+
     logger.info(f"\n{CYAN}View your Evaluation:{RESET}\n{evaluation.url}\n")
     return checks
 
@@ -455,8 +489,7 @@ def _get_log_func(
     type_: FileType,
     file_id: str,
     version_id: str,
-    evaluation_id: str,
-    batch_id: str,
+    run_id: str,
 ) -> Callable:
     """Returns the appropriate log function pre-filled with common parameters."""
     log_request = {
@@ -464,8 +497,7 @@ def _get_log_func(
         #  Why are both `id` and `version_id` needed in the API?
         "id": file_id,
         "version_id": version_id,
-        "evaluation_id": evaluation_id,
-        "batch_id": batch_id,
+        "run_id": run_id,
     }
     if type_ == "flow":
         return partial(client.flows.log, **log_request, trace_status="complete")
@@ -479,7 +511,9 @@ def _get_log_func(
         raise NotImplementedError(f"Unsupported File version: {type_}")
 
 
-def get_score_from_evaluator_stat(stat: Union[NumericStats, BooleanStats]) -> Union[float, None]:
+def get_score_from_evaluator_stat(
+    stat: Union[NumericStats, BooleanStats],
+) -> Union[float, None]:
     """Get the score from an Evaluator Stat."""
     score = None
     if isinstance(stat, BooleanStats):
@@ -526,14 +560,18 @@ def _progress_bar(total: int, progress: int):
 
 
 def get_evaluator_stats_by_path(
-    stat: VersionStatsResponse, evaluation: EvaluationResponse
+    stat: RunStatsResponse, evaluation: EvaluationResponse
 ) -> Dict[str, Union[NumericStats, BooleanStats]]:
     """Get the Evaluator stats by path."""
     # TODO: Update the API so this is not necessary
-    evaluators_by_id = {evaluator.version.version_id: evaluator for evaluator in evaluation.evaluators}
+    evaluators_by_id = {
+        evaluator.version.version_id: evaluator for evaluator in evaluation.evaluators
+    }
     evaluator_stats_by_path = {
-        evaluators_by_id[evaluator_stat.evaluator_version_id].version.path: evaluator_stat
-        for evaluator_stat in stat.evaluator_version_stats
+        evaluators_by_id[
+            evaluator_stat.evaluator_version_id
+        ].version.path: evaluator_stat
+        for evaluator_stat in stat.evaluator_stats
     }
     return evaluator_stats_by_path
 
@@ -543,12 +581,13 @@ def check_evaluation_threshold(
     stats: EvaluationStats,
     evaluator_path: str,
     threshold: float,
-    batch_id: str,
+    run_id: str,
 ) -> bool:
     """Checks if the latest version has an average Evaluator result above a threshold."""
     # TODO: Update the API so this is not necessary
     evaluator_stats_by_path = get_evaluator_stats_by_path(
-        stat=next((stat for stat in stats.version_stats if stat.batch_id == batch_id), None), evaluation=evaluation
+        stat=next((stat for stat in stats.run_stats if stat.run_id == run_id), None),
+        evaluation=evaluation,
     )
     if evaluator_path in evaluator_stats_by_path:
         evaluator_stat = evaluator_stats_by_path[evaluator_path]
@@ -571,7 +610,7 @@ def check_evaluation_improvement(
     evaluation: EvaluationResponse,
     evaluator_path: str,
     stats: EvaluationStats,
-    batch_id: str,
+    run_id: str,
 ) -> Tuple[bool, float, float]:
     """
     Check the latest version has improved across for a specific Evaluator.
@@ -581,24 +620,34 @@ def check_evaluation_improvement(
     # TODO: Update the API so this is not necessary
 
     latest_evaluator_stats_by_path = get_evaluator_stats_by_path(
-        stat=next((stat for stat in stats.version_stats if stat.batch_id == batch_id), None), evaluation=evaluation
+        stat=next((stat for stat in stats.run_stats if stat.run_id == run_id), None),
+        evaluation=evaluation,
     )
-    if len(stats.version_stats) == 1:
+    if len(stats.run_stats) == 1:
         logger.info(f"{YELLOW}⚠️ No previous versions to compare with.{RESET}")
         return True, 0, 0
 
-    previous_evaluator_stats_by_path = get_evaluator_stats_by_path(stat=stats.version_stats[-2], evaluation=evaluation)
-    if evaluator_path in latest_evaluator_stats_by_path and evaluator_path in previous_evaluator_stats_by_path:
+    previous_evaluator_stats_by_path = get_evaluator_stats_by_path(
+        stat=stats.run_stats[-2], evaluation=evaluation
+    )
+    if (
+        evaluator_path in latest_evaluator_stats_by_path
+        and evaluator_path in previous_evaluator_stats_by_path
+    ):
         latest_evaluator_stat = latest_evaluator_stats_by_path[evaluator_path]
         previous_evaluator_stat = previous_evaluator_stats_by_path[evaluator_path]
         latest_score = get_score_from_evaluator_stat(stat=latest_evaluator_stat)
         previous_score = get_score_from_evaluator_stat(stat=previous_evaluator_stat)
         diff = round(latest_score - previous_score, 2)
         if diff >= 0:
-            logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
+            logger.info(
+                f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}"
+            )
             return True, latest_score, diff
         else:
-            logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
+            logger.info(
+                f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}"
+            )
             return False, latest_score, diff
     else:
         raise ValueError(f"Evaluator {evaluator_path} not found in the stats.")
diff --git a/src/humanloop/evaluations/client.py b/src/humanloop/evaluations/client.py
index 92cf4033..c2190762 100644
--- a/src/humanloop/evaluations/client.py
+++ b/src/humanloop/evaluations/client.py
@@ -11,15 +11,17 @@
 from ..types.http_validation_error import HttpValidationError
 from json.decoder import JSONDecodeError
 from ..core.api_error import ApiError
-from ..requests.evaluations_dataset_request import EvaluationsDatasetRequestParams
 from ..requests.evaluations_request import EvaluationsRequestParams
-from ..requests.evaluatee_request import EvaluateeRequestParams
 from ..requests.file_request import FileRequestParams
 from ..core.serialization import convert_and_respect_annotation_metadata
 from ..core.jsonable_encoder import jsonable_encoder
-from ..types.evaluation_status import EvaluationStatus
+from ..types.evaluation_runs_response import EvaluationRunsResponse
+from ..requests.evaluations_dataset_request import EvaluationsDatasetRequestParams
+from ..requests.version_specification import VersionSpecificationParams
+from ..types.logs_association_type import LogsAssociationType
+from ..types.evaluation_run_response import EvaluationRunResponse
 from ..types.evaluation_stats import EvaluationStats
-from ..types.paginated_data_evaluation_report_log_response import PaginatedDataEvaluationReportLogResponse
+from ..types.paginated_data_evaluation_log_response import PaginatedDataEvaluationLogResponse
 from ..core.client_wrapper import AsyncClientWrapper
 from ..core.pagination import AsyncPager
 
@@ -127,44 +129,30 @@ def list(
     def create(
         self,
         *,
-        dataset: EvaluationsDatasetRequestParams,
         evaluators: typing.Sequence[EvaluationsRequestParams],
-        evaluatees: typing.Optional[typing.Sequence[EvaluateeRequestParams]] = OMIT,
-        name: typing.Optional[str] = OMIT,
         file: typing.Optional[FileRequestParams] = OMIT,
+        name: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> EvaluationResponse:
         """
         Create an Evaluation.
 
-        Create a new Evaluation by specifying the Dataset, versions to be
-        evaluated (Evaluatees), and which Evaluators to provide judgments.
+        Create an Evaluation by specifying the File to evaluate, and a name
+        for the Evaluation.
 
-        Humanloop will automatically start generating Logs and running Evaluators where
-        `orchestrated=true`. If you own the runtime for the Evaluatee or Evaluator, you
-        can set `orchestrated=false` and then generate and submit the required logs using
-        your runtime.
-
-        To keep updated on the progress of the Evaluation, you can poll the Evaluation using
-        the `GET /evaluations/:id` endpoint and check its status.
+        You can then add Runs to this Evaluation using the `POST /evaluations/{id}/runs` endpoint.
 
         Parameters
         ----------
-        dataset : EvaluationsDatasetRequestParams
-            Dataset to use in this Evaluation.
-
         evaluators : typing.Sequence[EvaluationsRequestParams]
             The Evaluators used to evaluate.
 
-        evaluatees : typing.Optional[typing.Sequence[EvaluateeRequestParams]]
-            Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add Evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
+        file : typing.Optional[FileRequestParams]
+            The File to associate with the Evaluation. This File contains the Logs you're evaluating.
 
         name : typing.Optional[str]
             Name of the Evaluation to help identify it. Must be unique within the associated File.
 
-        file : typing.Optional[FileRequestParams]
-            The File to associate with the Evaluation.
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -181,30 +169,20 @@ def create(
             api_key="YOUR_API_KEY",
         )
         client.evaluations.create(
-            dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-            evaluatees=[
-                {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-            ],
-            evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
+            evaluators=[{}],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
             "evaluations",
             method="POST",
             json={
-                "dataset": convert_and_respect_annotation_metadata(
-                    object_=dataset, annotation=EvaluationsDatasetRequestParams, direction="write"
-                ),
-                "evaluatees": convert_and_respect_annotation_metadata(
-                    object_=evaluatees, annotation=typing.Sequence[EvaluateeRequestParams], direction="write"
+                "file": convert_and_respect_annotation_metadata(
+                    object_=file, annotation=FileRequestParams, direction="write"
                 ),
+                "name": name,
                 "evaluators": convert_and_respect_annotation_metadata(
                     object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
                 ),
-                "name": name,
-                "file": convert_and_respect_annotation_metadata(
-                    object_=file, annotation=FileRequestParams, direction="write"
-                ),
             },
             request_options=request_options,
             omit=OMIT,
@@ -233,15 +211,27 @@ def create(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationResponse:
+    def add_evaluators(
+        self,
+        id: str,
+        *,
+        evaluators: typing.Sequence[EvaluationsRequestParams],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationResponse:
         """
-        Get an Evaluation.
+        Add Evaluators to an Evaluation.
+
+        Add new Evaluators to an Evaluation. The Evaluators will be run on the Logs
+        generated for the Evaluation.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
+        evaluators : typing.Sequence[EvaluationsRequestParams]
+            The Evaluators to add to this Evaluation.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -257,14 +247,21 @@ def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = Non
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.get(
-            id="ev_567yza",
+        client.evaluations.add_evaluators(
+            id="id",
+            evaluators=[{}],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}",
-            method="GET",
+            f"evaluations/{jsonable_encoder(id)}/evaluators",
+            method="POST",
+            json={
+                "evaluators": convert_and_respect_annotation_metadata(
+                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
+                ),
+            },
             request_options=request_options,
+            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
@@ -290,24 +287,30 @@ def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = Non
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
+    def remove_evaluator(
+        self, id: str, evaluator_version_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationResponse:
         """
-        Delete an Evaluation.
+        Remove an Evaluator from an Evaluation.
 
-        Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
-        will not be deleted.
+        Remove an Evaluator from an Evaluation. The Evaluator will no longer be run on the Logs
+        generated for the Evaluation.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
+        evaluator_version_id : str
+            Unique identifier for Evaluator Version.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        None
+        EvaluationResponse
+            Successful Response
 
         Examples
         --------
@@ -316,18 +319,25 @@ def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] =
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.delete(
-            id="ev_567yza",
+        client.evaluations.remove_evaluator(
+            id="id",
+            evaluator_version_id="evaluator_version_id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}",
+            f"evaluations/{jsonable_encoder(id)}/evaluators/{jsonable_encoder(evaluator_version_id)}",
             method="DELETE",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
-                return
+                return typing.cast(
+                    EvaluationResponse,
+                    construct_type(
+                        type_=EvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
             if _response.status_code == 422:
                 raise UnprocessableEntityError(
                     typing.cast(
@@ -343,43 +353,15 @@ def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] =
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_setup(
-        self,
-        id: str,
-        *,
-        dataset: typing.Optional[EvaluationsDatasetRequestParams] = OMIT,
-        evaluatees: typing.Optional[typing.Sequence[EvaluateeRequestParams]] = OMIT,
-        evaluators: typing.Optional[typing.Sequence[EvaluationsRequestParams]] = OMIT,
-        name: typing.Optional[str] = OMIT,
-        file: typing.Optional[FileRequestParams] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EvaluationResponse:
+    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationResponse:
         """
-        Update an Evaluation.
-
-        Update the setup of an Evaluation by specifying the Dataset, versions to be
-        evaluated (Evaluatees), and which Evaluators to provide judgments.
+        Get an Evaluation.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
-        dataset : typing.Optional[EvaluationsDatasetRequestParams]
-            Dataset to use in this Evaluation.
-
-        evaluatees : typing.Optional[typing.Sequence[EvaluateeRequestParams]]
-            Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
-
-        evaluators : typing.Optional[typing.Sequence[EvaluationsRequestParams]]
-            The Evaluators used to evaluate.
-
-        name : typing.Optional[str]
-            Name of the Evaluation to help identify it. Must be unique within the associated File.
-
-        file : typing.Optional[FileRequestParams]
-            The File to associate with the Evaluation.
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -395,35 +377,14 @@ def update_setup(
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.update_setup(
+        client.evaluations.get(
             id="ev_567yza",
-            dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-            evaluatees=[
-                {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-            ],
-            evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
             f"evaluations/{jsonable_encoder(id)}",
-            method="PATCH",
-            json={
-                "dataset": convert_and_respect_annotation_metadata(
-                    object_=dataset, annotation=EvaluationsDatasetRequestParams, direction="write"
-                ),
-                "evaluatees": convert_and_respect_annotation_metadata(
-                    object_=evaluatees, annotation=typing.Sequence[EvaluateeRequestParams], direction="write"
-                ),
-                "evaluators": convert_and_respect_annotation_metadata(
-                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
-                ),
-                "name": name,
-                "file": convert_and_respect_annotation_metadata(
-                    object_=file, annotation=FileRequestParams, direction="write"
-                ),
-            },
+            method="GET",
             request_options=request_options,
-            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
@@ -449,29 +410,24 @@ def update_setup(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def update_status(
-        self, id: str, *, status: EvaluationStatus, request_options: typing.Optional[RequestOptions] = None
-    ) -> EvaluationResponse:
+    def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
         """
-        Update the status of an Evaluation.
+        Delete an Evaluation.
 
-        Can be used to cancel a running Evaluation, or mark an Evaluation that uses
-        external or human evaluators as completed.
+        Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
+        will not be deleted.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
-        status : EvaluationStatus
-
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
-            Successful Response
+        None
 
         Examples
         --------
@@ -480,29 +436,18 @@ def update_status(
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.update_status(
-            id="id",
-            status="pending",
+        client.evaluations.delete(
+            id="ev_567yza",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/status",
-            method="PATCH",
-            json={
-                "status": status,
-            },
+            f"evaluations/{jsonable_encoder(id)}",
+            method="DELETE",
             request_options=request_options,
-            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EvaluationResponse,
-                    construct_type(
-                        type_=EvaluationResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
+                return
             if _response.status_code == 422:
                 raise UnprocessableEntityError(
                     typing.cast(
@@ -518,13 +463,11 @@ def update_status(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get_stats(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationStats:
+    def list_runs_for_evaluation(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationRunsResponse:
         """
-        Get Evaluation Stats.
-
-        Retrieve aggregate stats for the specified Evaluation.
-        This includes the number of generated Logs for each evaluated version and the
-        corresponding Evaluator statistics (such as the mean and percentiles).
+        List all Runs for an Evaluation.
 
         Parameters
         ----------
@@ -536,7 +479,7 @@ def get_stats(self, id: str, *, request_options: typing.Optional[RequestOptions]
 
         Returns
         -------
-        EvaluationStats
+        EvaluationRunsResponse
             Successful Response
 
         Examples
@@ -546,21 +489,21 @@ def get_stats(self, id: str, *, request_options: typing.Optional[RequestOptions]
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.get_stats(
+        client.evaluations.list_runs_for_evaluation(
             id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/stats",
+            f"evaluations/{jsonable_encoder(id)}/runs",
             method="GET",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationStats,
+                    EvaluationRunsResponse,
                     construct_type(
-                        type_=EvaluationStats,  # type: ignore
+                        type_=EvaluationRunsResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -579,37 +522,60 @@ def get_stats(self, id: str, *, request_options: typing.Optional[RequestOptions]
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def get_logs(
+    def create_run(
         self,
         id: str,
         *,
-        page: typing.Optional[int] = None,
-        size: typing.Optional[int] = None,
+        dataset: typing.Optional[EvaluationsDatasetRequestParams] = OMIT,
+        version: typing.Optional[VersionSpecificationParams] = OMIT,
+        orchestrated: typing.Optional[bool] = OMIT,
+        logs: typing.Optional[LogsAssociationType] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> PaginatedDataEvaluationReportLogResponse:
+    ) -> EvaluationRunResponse:
         """
-        Get the Logs associated to a specific Evaluation.
+        Create an Evaluation Run.
 
-        Each Datapoint in your Dataset will have a corresponding Log for each File version evaluated.
-        e.g. If you have 50 Datapoints and are evaluating 2 Prompts, there will be 100 Logs associated with the Evaluation.
+        Create a new Evaluation Run. Optionally specify the Dataset and version to be
+        evaluated.
+
+        Humanloop will automatically start generating Logs and running Evaluators where
+        `orchestrated=true`. If you are generating Logs yourself, you can set `orchestrated=false`
+        and then generate and submit the required Logs via the API.
+
+        The `logs` parameter controls which Logs are associated with the Run. Defaults to `dynamic`
+        if `dataset` and `version` are provided. This means that Logs will automatically be retrieved
+        if they're associated with the specified Version and has `source_datapoint_id` referencing
+        a datapoint in the specified Dataset.
+        If `logs` is set to `fixed`, no existing Logs will be automatically associated with the Run.
+        You can then add Logs to the Run using the `POST /evaluations/{id}/runs/{run_id}/logs` endpoint,
+        or by adding `run_id` to your `POST /prompts/logs` requests.
+
+        To keep updated on the progress of the Run, you can poll the Run using
+        the `GET /evaluations/{id}/runs` endpoint and check its status.
 
         Parameters
         ----------
         id : str
-            String ID of evaluation. Starts with `ev_` or `evr_`.
+            Unique identifier for Evaluation.
 
-        page : typing.Optional[int]
-            Page number for pagination.
+        dataset : typing.Optional[EvaluationsDatasetRequestParams]
+            Dataset to use in this Run.
 
-        size : typing.Optional[int]
-            Page size for pagination. Number of Logs to fetch.
+        version : typing.Optional[VersionSpecificationParams]
+            Version to use in this Run.
+
+        orchestrated : typing.Optional[bool]
+            Whether the Run is orchestrated by Humanloop. If `True`, Humanloop will generate Logs for the Run; `dataset` and `version` must be provided. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
+
+        logs : typing.Optional[LogsAssociationType]
+            How the Logs are associated with the Run. If `dynamic`, the latest relevant Logs will be inferred from the Dataset and Version. If `fixed`, the Logs will be explicitly associated. You can provide a list of Log IDs to associate with the Run, or add them to the Run later. Defaults to `dynamic` if `dataset` and `version` are provided; otherwise, defaults to `fixed`.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        PaginatedDataEvaluationReportLogResponse
+        EvaluationRunResponse
             Successful Response
 
         Examples
@@ -619,25 +585,32 @@ def get_logs(
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.get_logs(
+        client.evaluations.create_run(
             id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/logs",
-            method="GET",
-            params={
-                "page": page,
-                "size": size,
+            f"evaluations/{jsonable_encoder(id)}/runs",
+            method="POST",
+            json={
+                "dataset": convert_and_respect_annotation_metadata(
+                    object_=dataset, annotation=EvaluationsDatasetRequestParams, direction="write"
+                ),
+                "version": convert_and_respect_annotation_metadata(
+                    object_=version, annotation=VersionSpecificationParams, direction="write"
+                ),
+                "orchestrated": orchestrated,
+                "logs": logs,
             },
             request_options=request_options,
+            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    PaginatedDataEvaluationReportLogResponse,
+                    EvaluationRunResponse,
                     construct_type(
-                        type_=PaginatedDataEvaluationReportLogResponse,  # type: ignore
+                        type_=EvaluationRunResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -656,53 +629,26 @@ def get_logs(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    def pin_evaluatee(
-        self,
-        id: str,
-        *,
-        version_id: typing.Optional[str] = OMIT,
-        path: typing.Optional[str] = OMIT,
-        file_id: typing.Optional[str] = OMIT,
-        environment: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
-        orchestrated: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EvaluationResponse:
+    def add_existing_run(
+        self, id: str, run_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> typing.Optional[typing.Any]:
         """
-        Pin the specified Evaluatee.
-
-        Pinned Evaluatees are always displayed in the Evaluation Overview,
-        and serve as the baseline for comparison with other Evaluatees.
+        Add an existing Run to an Evaluation.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
-        version_id : typing.Optional[str]
-            Unique identifier for the File Version. If provided, none of the other fields should be specified.
-
-        path : typing.Optional[str]
-            Path identifying a File. Provide either this or `file_id` if you want to specify a File.
-
-        file_id : typing.Optional[str]
-            Unique identifier for the File. Provide either this or `path` if you want to specify a File.
-
-        environment : typing.Optional[str]
-            Name of the Environment a Version is deployed to. Only provide this when specifying a File. If not provided (and a File is specified), the default Environment is used.
-
-        batch_id : typing.Optional[str]
-            Unique identifier for the batch of Logs to include in the Evaluation Report.
-
-        orchestrated : typing.Optional[bool]
-            Whether the Prompt/Tool is orchestrated by Humanloop. Default is `True`. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
+        run_id : str
+            Unique identifier for Run.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
+        typing.Optional[typing.Any]
             Successful Response
 
         Examples
@@ -712,30 +658,22 @@ def pin_evaluatee(
         client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-        client.evaluations.pin_evaluatee(
+        client.evaluations.add_existing_run(
             id="id",
+            run_id="run_id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/pin-evaluatee",
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
             method="POST",
-            json={
-                "version_id": version_id,
-                "path": path,
-                "file_id": file_id,
-                "environment": environment,
-                "batch_id": batch_id,
-                "orchestrated": orchestrated,
-            },
             request_options=request_options,
-            omit=OMIT,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationResponse,
+                    typing.Optional[typing.Any],
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=typing.Optional[typing.Any],  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -754,97 +692,889 @@ def pin_evaluatee(
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-
-class AsyncEvaluationsClient:
-    def __init__(self, *, client_wrapper: AsyncClientWrapper):
-        self._client_wrapper = client_wrapper
-
-    async def list(
-        self,
-        *,
-        file_id: str,
-        page: typing.Optional[int] = None,
-        size: typing.Optional[int] = None,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> AsyncPager[EvaluationResponse]:
+    def remove_run_from_evaluation(
+        self, id: str, run_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> None:
         """
-        List all Evaluations for the specified `file_id`.
+        Remove a Run from an Evaluation.
 
-        Retrieve a list of Evaluations that evaluate versions of the specified File.
+        Remove a Run from an Evaluation. The Logs and Versions used in the Run will not be deleted.
+        If this Run is used in any other Evaluations, it will still be available in those Evaluations.
 
         Parameters
         ----------
-        file_id : str
-            Filter by File ID. Only Evaluations for the specified File will be returned.
-
-        page : typing.Optional[int]
-            Page number for pagination.
+        id : str
+            Unique identifier for Evaluation.
 
-        size : typing.Optional[int]
-            Page size for pagination. Number of Evaluations to fetch.
+        run_id : str
+            Unique identifier for Run.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        AsyncPager[EvaluationResponse]
-            Successful Response
+        None
 
         Examples
         --------
-        import asyncio
-
-        from humanloop import AsyncHumanloop
+        from humanloop import Humanloop
 
-        client = AsyncHumanloop(
+        client = Humanloop(
             api_key="YOUR_API_KEY",
         )
-
-
-        async def main() -> None:
-            response = await client.evaluations.list(
-                file_id="pr_30gco7dx6JDq4200GVOHa",
-                size=1,
-            )
-            async for item in response:
-                yield item
-            # alternatively, you can paginate page-by-page
-            async for page in response.iter_pages():
-                yield page
-
-
-        asyncio.run(main())
+        client.evaluations.remove_run_from_evaluation(
+            id="id",
+            run_id="run_id",
+        )
         """
-        page = page if page is not None else 1
-        _response = await self._client_wrapper.httpx_client.request(
-            "evaluations",
-            method="GET",
-            params={
-                "file_id": file_id,
-                "page": page,
-                "size": size,
-            },
+        _response = self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
+            method="DELETE",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
-                _parsed_response = typing.cast(
-                    PaginatedEvaluationResponse,
+                return
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def update_evaluation_run(
+        self, id: str, run_id: str, *, control: bool, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationRunResponse:
+        """
+        Update an Evaluation Run.
+
+        Update the Dataset and version to be evaluated for an existing Run.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        run_id : str
+            Unique identifier for Run.
+
+        control : bool
+            If `True`, this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRunResponse
+            Successful Response
+
+        Examples
+        --------
+        from humanloop import Humanloop
+
+        client = Humanloop(
+            api_key="YOUR_API_KEY",
+        )
+        client.evaluations.update_evaluation_run(
+            id="id",
+            run_id="run_id",
+            control=True,
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
+            method="PATCH",
+            json={
+                "control": control,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationRunResponse,
+                    construct_type(
+                        type_=EvaluationRunResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def add_logs_to_run(
+        self,
+        id: str,
+        run_id: str,
+        *,
+        log_ids: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRunResponse:
+        """
+        Add Logs to an Evaluation Run.
+
+        This is supported only for Runs that have a fixed set of Logs.
+        (Runs can either have a fixed set of Logs, or can be set to dynamically retrieve the latest Logs
+        if a Dataset and Version are provided.)
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        run_id : str
+            Unique identifier for Run.
+
+        log_ids : typing.Sequence[str]
+            The IDs of the Logs to add to the Run.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRunResponse
+            Successful Response
+
+        Examples
+        --------
+        from humanloop import Humanloop
+
+        client = Humanloop(
+            api_key="YOUR_API_KEY",
+        )
+        client.evaluations.add_logs_to_run(
+            id="id",
+            run_id="run_id",
+            log_ids=["log_ids"],
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}/logs",
+            method="POST",
+            json={
+                "log_ids": log_ids,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationRunResponse,
+                    construct_type(
+                        type_=EvaluationRunResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def get_stats(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationStats:
+        """
+        Get Evaluation Stats.
+
+        Retrieve aggregate stats for the specified Evaluation.
+
+        This includes the number of generated Logs for each Run and the
+        corresponding Evaluator statistics (such as the mean and percentiles).
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationStats
+            Successful Response
+
+        Examples
+        --------
+        from humanloop import Humanloop
+
+        client = Humanloop(
+            api_key="YOUR_API_KEY",
+        )
+        client.evaluations.get_stats(
+            id="id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/stats",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationStats,
+                    construct_type(
+                        type_=EvaluationStats,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    def get_logs(
+        self,
+        id: str,
+        *,
+        page: typing.Optional[int] = None,
+        size: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> PaginatedDataEvaluationLogResponse:
+        """
+        Get the Logs associated to a specific Evaluation.
+
+        Parameters
+        ----------
+        id : str
+            String ID of evaluation. Starts with `ev_` or `evr_`.
+
+        page : typing.Optional[int]
+            Page number for pagination.
+
+        size : typing.Optional[int]
+            Page size for pagination. Number of Logs to fetch.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        PaginatedDataEvaluationLogResponse
+            Successful Response
+
+        Examples
+        --------
+        from humanloop import Humanloop
+
+        client = Humanloop(
+            api_key="YOUR_API_KEY",
+        )
+        client.evaluations.get_logs(
+            id="id",
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/logs",
+            method="GET",
+            params={
+                "page": page,
+                "size": size,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    PaginatedDataEvaluationLogResponse,
+                    construct_type(
+                        type_=PaginatedDataEvaluationLogResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+
+class AsyncEvaluationsClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def list(
+        self,
+        *,
+        file_id: str,
+        page: typing.Optional[int] = None,
+        size: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncPager[EvaluationResponse]:
+        """
+        List all Evaluations for the specified `file_id`.
+
+        Retrieve a list of Evaluations that evaluate versions of the specified File.
+
+        Parameters
+        ----------
+        file_id : str
+            Filter by File ID. Only Evaluations for the specified File will be returned.
+
+        page : typing.Optional[int]
+            Page number for pagination.
+
+        size : typing.Optional[int]
+            Page size for pagination. Number of Evaluations to fetch.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncPager[EvaluationResponse]
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            response = await client.evaluations.list(
+                file_id="pr_30gco7dx6JDq4200GVOHa",
+                size=1,
+            )
+            async for item in response:
+                yield item
+            # alternatively, you can paginate page-by-page
+            async for page in response.iter_pages():
+                yield page
+
+
+        asyncio.run(main())
+        """
+        page = page if page is not None else 1
+        _response = await self._client_wrapper.httpx_client.request(
+            "evaluations",
+            method="GET",
+            params={
+                "file_id": file_id,
+                "page": page,
+                "size": size,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _parsed_response = typing.cast(
+                    PaginatedEvaluationResponse,
+                    construct_type(
+                        type_=PaginatedEvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                _has_next = True
+                _get_next = lambda: self.list(
+                    file_id=file_id,
+                    page=page + 1,
+                    size=size,
+                    request_options=request_options,
+                )
+                _items = _parsed_response.records
+                return AsyncPager(has_next=_has_next, items=_items, get_next=_get_next)
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def create(
+        self,
+        *,
+        evaluators: typing.Sequence[EvaluationsRequestParams],
+        file: typing.Optional[FileRequestParams] = OMIT,
+        name: typing.Optional[str] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationResponse:
+        """
+        Create an Evaluation.
+
+        Create an Evaluation by specifying the File to evaluate, and a name
+        for the Evaluation.
+
+        You can then add Runs to this Evaluation using the `POST /evaluations/{id}/runs` endpoint.
+
+        Parameters
+        ----------
+        evaluators : typing.Sequence[EvaluationsRequestParams]
+            The Evaluators used to evaluate.
+
+        file : typing.Optional[FileRequestParams]
+            The File to associate with the Evaluation. This File contains the Logs you're evaluating.
+
+        name : typing.Optional[str]
+            Name of the Evaluation to help identify it. Must be unique within the associated File.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationResponse
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.create(
+                evaluators=[{}],
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "evaluations",
+            method="POST",
+            json={
+                "file": convert_and_respect_annotation_metadata(
+                    object_=file, annotation=FileRequestParams, direction="write"
+                ),
+                "name": name,
+                "evaluators": convert_and_respect_annotation_metadata(
+                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationResponse,
+                    construct_type(
+                        type_=EvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def add_evaluators(
+        self,
+        id: str,
+        *,
+        evaluators: typing.Sequence[EvaluationsRequestParams],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationResponse:
+        """
+        Add Evaluators to an Evaluation.
+
+        Add new Evaluators to an Evaluation. The Evaluators will be run on the Logs
+        generated for the Evaluation.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        evaluators : typing.Sequence[EvaluationsRequestParams]
+            The Evaluators to add to this Evaluation.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationResponse
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.add_evaluators(
+                id="id",
+                evaluators=[{}],
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/evaluators",
+            method="POST",
+            json={
+                "evaluators": convert_and_respect_annotation_metadata(
+                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationResponse,
+                    construct_type(
+                        type_=EvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def remove_evaluator(
+        self, id: str, evaluator_version_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationResponse:
+        """
+        Remove an Evaluator from an Evaluation.
+
+        Remove an Evaluator from an Evaluation. The Evaluator will no longer be run on the Logs
+        generated for the Evaluation.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        evaluator_version_id : str
+            Unique identifier for Evaluator Version.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationResponse
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.remove_evaluator(
+                id="id",
+                evaluator_version_id="evaluator_version_id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/evaluators/{jsonable_encoder(evaluator_version_id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationResponse,
+                    construct_type(
+                        type_=EvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationResponse:
+        """
+        Get an Evaluation.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationResponse
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.get(
+                id="ev_567yza",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationResponse,
+                    construct_type(
+                        type_=EvaluationResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
+        """
+        Delete an Evaluation.
+
+        Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
+        will not be deleted.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        None
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.delete(
+                id="ev_567yza",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return
+            if _response.status_code == 422:
+                raise UnprocessableEntityError(
+                    typing.cast(
+                        HttpValidationError,
+                        construct_type(
+                            type_=HttpValidationError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+
+    async def list_runs_for_evaluation(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationRunsResponse:
+        """
+        List all Runs for an Evaluation.
+
+        Parameters
+        ----------
+        id : str
+            Unique identifier for Evaluation.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRunsResponse
+            Successful Response
+
+        Examples
+        --------
+        import asyncio
+
+        from humanloop import AsyncHumanloop
+
+        client = AsyncHumanloop(
+            api_key="YOUR_API_KEY",
+        )
+
+
+        async def main() -> None:
+            await client.evaluations.list_runs_for_evaluation(
+                id="id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"evaluations/{jsonable_encoder(id)}/runs",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    EvaluationRunsResponse,
                     construct_type(
-                        type_=PaginatedEvaluationResponse,  # type: ignore
+                        type_=EvaluationRunsResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
-                _has_next = True
-                _get_next = lambda: self.list(
-                    file_id=file_id,
-                    page=page + 1,
-                    size=size,
-                    request_options=request_options,
-                )
-                _items = _parsed_response.records
-                return AsyncPager(has_next=_has_next, items=_items, get_next=_get_next)
             if _response.status_code == 422:
                 raise UnprocessableEntityError(
                     typing.cast(
@@ -860,53 +1590,60 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def create(
+    async def create_run(
         self,
+        id: str,
         *,
-        dataset: EvaluationsDatasetRequestParams,
-        evaluators: typing.Sequence[EvaluationsRequestParams],
-        evaluatees: typing.Optional[typing.Sequence[EvaluateeRequestParams]] = OMIT,
-        name: typing.Optional[str] = OMIT,
-        file: typing.Optional[FileRequestParams] = OMIT,
+        dataset: typing.Optional[EvaluationsDatasetRequestParams] = OMIT,
+        version: typing.Optional[VersionSpecificationParams] = OMIT,
+        orchestrated: typing.Optional[bool] = OMIT,
+        logs: typing.Optional[LogsAssociationType] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> EvaluationResponse:
+    ) -> EvaluationRunResponse:
         """
-        Create an Evaluation.
+        Create an Evaluation Run.
 
-        Create a new Evaluation by specifying the Dataset, versions to be
-        evaluated (Evaluatees), and which Evaluators to provide judgments.
+        Create a new Evaluation Run. Optionally specify the Dataset and version to be
+        evaluated.
 
         Humanloop will automatically start generating Logs and running Evaluators where
-        `orchestrated=true`. If you own the runtime for the Evaluatee or Evaluator, you
-        can set `orchestrated=false` and then generate and submit the required logs using
-        your runtime.
+        `orchestrated=true`. If you are generating Logs yourself, you can set `orchestrated=false`
+        and then generate and submit the required Logs via the API.
+
+        The `logs` parameter controls which Logs are associated with the Run. Defaults to `dynamic`
+        if `dataset` and `version` are provided. This means that Logs will automatically be retrieved
+        if they're associated with the specified Version and has `source_datapoint_id` referencing
+        a datapoint in the specified Dataset.
+        If `logs` is set to `fixed`, no existing Logs will be automatically associated with the Run.
+        You can then add Logs to the Run using the `POST /evaluations/{id}/runs/{run_id}/logs` endpoint,
+        or by adding `run_id` to your `POST /prompts/logs` requests.
 
-        To keep updated on the progress of the Evaluation, you can poll the Evaluation using
-        the `GET /evaluations/:id` endpoint and check its status.
+        To keep updated on the progress of the Run, you can poll the Run using
+        the `GET /evaluations/{id}/runs` endpoint and check its status.
 
         Parameters
         ----------
-        dataset : EvaluationsDatasetRequestParams
-            Dataset to use in this Evaluation.
+        id : str
+            Unique identifier for Evaluation.
 
-        evaluators : typing.Sequence[EvaluationsRequestParams]
-            The Evaluators used to evaluate.
+        dataset : typing.Optional[EvaluationsDatasetRequestParams]
+            Dataset to use in this Run.
 
-        evaluatees : typing.Optional[typing.Sequence[EvaluateeRequestParams]]
-            Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add Evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
+        version : typing.Optional[VersionSpecificationParams]
+            Version to use in this Run.
 
-        name : typing.Optional[str]
-            Name of the Evaluation to help identify it. Must be unique within the associated File.
+        orchestrated : typing.Optional[bool]
+            Whether the Run is orchestrated by Humanloop. If `True`, Humanloop will generate Logs for the Run; `dataset` and `version` must be provided. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
 
-        file : typing.Optional[FileRequestParams]
-            The File to associate with the Evaluation.
+        logs : typing.Optional[LogsAssociationType]
+            How the Logs are associated with the Run. If `dynamic`, the latest relevant Logs will be inferred from the Dataset and Version. If `fixed`, the Logs will be explicitly associated. You can provide a list of Log IDs to associate with the Run, or add them to the Run later. Defaults to `dynamic` if `dataset` and `version` are provided; otherwise, defaults to `fixed`.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
+        EvaluationRunResponse
             Successful Response
 
         Examples
@@ -921,34 +1658,25 @@ async def create(
 
 
         async def main() -> None:
-            await client.evaluations.create(
-                dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-                evaluatees=[
-                    {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-                ],
-                evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
+            await client.evaluations.create_run(
+                id="id",
             )
 
 
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "evaluations",
+            f"evaluations/{jsonable_encoder(id)}/runs",
             method="POST",
             json={
                 "dataset": convert_and_respect_annotation_metadata(
                     object_=dataset, annotation=EvaluationsDatasetRequestParams, direction="write"
                 ),
-                "evaluatees": convert_and_respect_annotation_metadata(
-                    object_=evaluatees, annotation=typing.Sequence[EvaluateeRequestParams], direction="write"
-                ),
-                "evaluators": convert_and_respect_annotation_metadata(
-                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
-                ),
-                "name": name,
-                "file": convert_and_respect_annotation_metadata(
-                    object_=file, annotation=FileRequestParams, direction="write"
+                "version": convert_and_respect_annotation_metadata(
+                    object_=version, annotation=VersionSpecificationParams, direction="write"
                 ),
+                "orchestrated": orchestrated,
+                "logs": logs,
             },
             request_options=request_options,
             omit=OMIT,
@@ -956,9 +1684,9 @@ async def main() -> None:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationResponse,
+                    EvaluationRunResponse,
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=EvaluationRunResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -977,21 +1705,26 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EvaluationResponse:
+    async def add_existing_run(
+        self, id: str, run_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> typing.Optional[typing.Any]:
         """
-        Get an Evaluation.
+        Add an existing Run to an Evaluation.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
+        run_id : str
+            Unique identifier for Run.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
+        typing.Optional[typing.Any]
             Successful Response
 
         Examples
@@ -1006,24 +1739,25 @@ async def get(self, id: str, *, request_options: typing.Optional[RequestOptions]
 
 
         async def main() -> None:
-            await client.evaluations.get(
-                id="ev_567yza",
+            await client.evaluations.add_existing_run(
+                id="id",
+                run_id="run_id",
             )
 
 
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}",
-            method="GET",
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
+            method="POST",
             request_options=request_options,
         )
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationResponse,
+                    typing.Optional[typing.Any],
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=typing.Optional[typing.Any],  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1042,18 +1776,23 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:
+    async def remove_run_from_evaluation(
+        self, id: str, run_id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> None:
         """
-        Delete an Evaluation.
+        Remove a Run from an Evaluation.
 
-        Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
-        will not be deleted.
+        Remove a Run from an Evaluation. The Logs and Versions used in the Run will not be deleted.
+        If this Run is used in any other Evaluations, it will still be available in those Evaluations.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
+        run_id : str
+            Unique identifier for Run.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1073,15 +1812,16 @@ async def delete(self, id: str, *, request_options: typing.Optional[RequestOptio
 
 
         async def main() -> None:
-            await client.evaluations.delete(
-                id="ev_567yza",
+            await client.evaluations.remove_run_from_evaluation(
+                id="id",
+                run_id="run_id",
             )
 
 
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}",
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
             method="DELETE",
             request_options=request_options,
         )
@@ -1103,49 +1843,31 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_setup(
-        self,
-        id: str,
-        *,
-        dataset: typing.Optional[EvaluationsDatasetRequestParams] = OMIT,
-        evaluatees: typing.Optional[typing.Sequence[EvaluateeRequestParams]] = OMIT,
-        evaluators: typing.Optional[typing.Sequence[EvaluationsRequestParams]] = OMIT,
-        name: typing.Optional[str] = OMIT,
-        file: typing.Optional[FileRequestParams] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EvaluationResponse:
+    async def update_evaluation_run(
+        self, id: str, run_id: str, *, control: bool, request_options: typing.Optional[RequestOptions] = None
+    ) -> EvaluationRunResponse:
         """
-        Update an Evaluation.
+        Update an Evaluation Run.
 
-        Update the setup of an Evaluation by specifying the Dataset, versions to be
-        evaluated (Evaluatees), and which Evaluators to provide judgments.
+        Update the Dataset and version to be evaluated for an existing Run.
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
-        dataset : typing.Optional[EvaluationsDatasetRequestParams]
-            Dataset to use in this Evaluation.
-
-        evaluatees : typing.Optional[typing.Sequence[EvaluateeRequestParams]]
-            Unique identifiers for the Prompt/Tool Versions to include in the Evaluation. Can be left unpopulated if you wish to add evaluatees to this Evaluation by specifying `evaluation_id` in Log calls.
-
-        evaluators : typing.Optional[typing.Sequence[EvaluationsRequestParams]]
-            The Evaluators used to evaluate.
-
-        name : typing.Optional[str]
-            Name of the Evaluation to help identify it. Must be unique within the associated File.
+        run_id : str
+            Unique identifier for Run.
 
-        file : typing.Optional[FileRequestParams]
-            The File to associate with the Evaluation.
+        control : bool
+            If `True`, this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
+        EvaluationRunResponse
             Successful Response
 
         Examples
@@ -1160,35 +1882,20 @@ async def update_setup(
 
 
         async def main() -> None:
-            await client.evaluations.update_setup(
-                id="ev_567yza",
-                dataset={"version_id": "dsv_6L78pqrdFi2xa"},
-                evaluatees=[
-                    {"version_id": "prv_7ZlQREDScH0xkhUwtXruN", "orchestrated": False}
-                ],
-                evaluators=[{"version_id": "evv_012def", "orchestrated": False}],
+            await client.evaluations.update_evaluation_run(
+                id="id",
+                run_id="run_id",
+                control=True,
             )
 
 
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}",
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}",
             method="PATCH",
             json={
-                "dataset": convert_and_respect_annotation_metadata(
-                    object_=dataset, annotation=EvaluationsDatasetRequestParams, direction="write"
-                ),
-                "evaluatees": convert_and_respect_annotation_metadata(
-                    object_=evaluatees, annotation=typing.Sequence[EvaluateeRequestParams], direction="write"
-                ),
-                "evaluators": convert_and_respect_annotation_metadata(
-                    object_=evaluators, annotation=typing.Sequence[EvaluationsRequestParams], direction="write"
-                ),
-                "name": name,
-                "file": convert_and_respect_annotation_metadata(
-                    object_=file, annotation=FileRequestParams, direction="write"
-                ),
+                "control": control,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1196,9 +1903,9 @@ async def main() -> None:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationResponse,
+                    EvaluationRunResponse,
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=EvaluationRunResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1217,28 +1924,38 @@ async def main() -> None:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
 
-    async def update_status(
-        self, id: str, *, status: EvaluationStatus, request_options: typing.Optional[RequestOptions] = None
-    ) -> EvaluationResponse:
+    async def add_logs_to_run(
+        self,
+        id: str,
+        run_id: str,
+        *,
+        log_ids: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRunResponse:
         """
-        Update the status of an Evaluation.
+        Add Logs to an Evaluation Run.
 
-        Can be used to cancel a running Evaluation, or mark an Evaluation that uses
-        external or human evaluators as completed.
+        This is supported only for Runs that have a fixed set of Logs.
+        (Runs can either have a fixed set of Logs, or can be set to dynamically retrieve the latest Logs
+        if a Dataset and Version are provided.)
 
         Parameters
         ----------
         id : str
             Unique identifier for Evaluation.
 
-        status : EvaluationStatus
+        run_id : str
+            Unique identifier for Run.
+
+        log_ids : typing.Sequence[str]
+            The IDs of the Logs to add to the Run.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
         Returns
         -------
-        EvaluationResponse
+        EvaluationRunResponse
             Successful Response
 
         Examples
@@ -1253,19 +1970,20 @@ async def update_status(
 
 
         async def main() -> None:
-            await client.evaluations.update_status(
+            await client.evaluations.add_logs_to_run(
                 id="id",
-                status="pending",
+                run_id="run_id",
+                log_ids=["log_ids"],
             )
 
 
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/status",
-            method="PATCH",
+            f"evaluations/{jsonable_encoder(id)}/runs/{jsonable_encoder(run_id)}/logs",
+            method="POST",
             json={
-                "status": status,
+                "log_ids": log_ids,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1273,9 +1991,9 @@ async def main() -> None:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    EvaluationResponse,
+                    EvaluationRunResponse,
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=EvaluationRunResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1299,7 +2017,8 @@ async def get_stats(self, id: str, *, request_options: typing.Optional[RequestOp
         Get Evaluation Stats.
 
         Retrieve aggregate stats for the specified Evaluation.
-        This includes the number of generated Logs for each evaluated version and the
+
+        This includes the number of generated Logs for each Run and the
         corresponding Evaluator statistics (such as the mean and percentiles).
 
         Parameters
@@ -1370,13 +2089,10 @@ async def get_logs(
         page: typing.Optional[int] = None,
         size: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> PaginatedDataEvaluationReportLogResponse:
+    ) -> PaginatedDataEvaluationLogResponse:
         """
         Get the Logs associated to a specific Evaluation.
 
-        Each Datapoint in your Dataset will have a corresponding Log for each File version evaluated.
-        e.g. If you have 50 Datapoints and are evaluating 2 Prompts, there will be 100 Logs associated with the Evaluation.
-
         Parameters
         ----------
         id : str
@@ -1393,7 +2109,7 @@ async def get_logs(
 
         Returns
         -------
-        PaginatedDataEvaluationReportLogResponse
+        PaginatedDataEvaluationLogResponse
             Successful Response
 
         Examples
@@ -1427,115 +2143,9 @@ async def main() -> None:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    PaginatedDataEvaluationReportLogResponse,
-                    construct_type(
-                        type_=PaginatedDataEvaluationReportLogResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 422:
-                raise UnprocessableEntityError(
-                    typing.cast(
-                        HttpValidationError,
-                        construct_type(
-                            type_=HttpValidationError,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def pin_evaluatee(
-        self,
-        id: str,
-        *,
-        version_id: typing.Optional[str] = OMIT,
-        path: typing.Optional[str] = OMIT,
-        file_id: typing.Optional[str] = OMIT,
-        environment: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
-        orchestrated: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> EvaluationResponse:
-        """
-        Pin the specified Evaluatee.
-
-        Pinned Evaluatees are always displayed in the Evaluation Overview,
-        and serve as the baseline for comparison with other Evaluatees.
-
-        Parameters
-        ----------
-        id : str
-            Unique identifier for Evaluation.
-
-        version_id : typing.Optional[str]
-            Unique identifier for the File Version. If provided, none of the other fields should be specified.
-
-        path : typing.Optional[str]
-            Path identifying a File. Provide either this or `file_id` if you want to specify a File.
-
-        file_id : typing.Optional[str]
-            Unique identifier for the File. Provide either this or `path` if you want to specify a File.
-
-        environment : typing.Optional[str]
-            Name of the Environment a Version is deployed to. Only provide this when specifying a File. If not provided (and a File is specified), the default Environment is used.
-
-        batch_id : typing.Optional[str]
-            Unique identifier for the batch of Logs to include in the Evaluation Report.
-
-        orchestrated : typing.Optional[bool]
-            Whether the Prompt/Tool is orchestrated by Humanloop. Default is `True`. If `False`, a log for the Prompt/Tool should be submitted by the user via the API.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        EvaluationResponse
-            Successful Response
-
-        Examples
-        --------
-        import asyncio
-
-        from humanloop import AsyncHumanloop
-
-        client = AsyncHumanloop(
-            api_key="YOUR_API_KEY",
-        )
-
-
-        async def main() -> None:
-            await client.evaluations.pin_evaluatee(
-                id="id",
-            )
-
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            f"evaluations/{jsonable_encoder(id)}/pin-evaluatee",
-            method="POST",
-            json={
-                "version_id": version_id,
-                "path": path,
-                "file_id": file_id,
-                "environment": environment,
-                "batch_id": batch_id,
-                "orchestrated": orchestrated,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    EvaluationResponse,
+                    PaginatedDataEvaluationLogResponse,
                     construct_type(
-                        type_=EvaluationResponse,  # type: ignore
+                        type_=PaginatedDataEvaluationLogResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
diff --git a/src/humanloop/evaluators/client.py b/src/humanloop/evaluators/client.py
index e05ae5cc..5e7ae73e 100644
--- a/src/humanloop/evaluators/client.py
+++ b/src/humanloop/evaluators/client.py
@@ -64,7 +64,6 @@ def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         create_evaluator_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -136,9 +135,6 @@ def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -197,7 +193,6 @@ def log(
                 "parent_id": parent_id,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": create_evaluator_log_request_environment,
                 "save": save,
@@ -1093,7 +1088,6 @@ async def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         create_evaluator_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -1165,9 +1159,6 @@ async def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -1234,7 +1225,6 @@ async def main() -> None:
                 "parent_id": parent_id,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": create_evaluator_log_request_environment,
                 "save": save,
diff --git a/src/humanloop/flows/client.py b/src/humanloop/flows/client.py
index 6b75d942..4b4671e7 100644
--- a/src/humanloop/flows/client.py
+++ b/src/humanloop/flows/client.py
@@ -45,7 +45,7 @@ def log(
         *,
         version_id: typing.Optional[str] = None,
         environment: typing.Optional[str] = None,
-        evaluation_id: typing.Optional[str] = OMIT,
+        run_id: typing.Optional[str] = OMIT,
         path: typing.Optional[str] = OMIT,
         id: typing.Optional[str] = OMIT,
         start_time: typing.Optional[dt.datetime] = OMIT,
@@ -62,7 +62,6 @@ def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         flow_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -85,8 +84,8 @@ def log(
         environment : typing.Optional[str]
             Name of the Environment identifying a deployed version to log to.
 
-        evaluation_id : typing.Optional[str]
-            Unique identifier for the Evaluation Report to associate the Log to.
+        run_id : typing.Optional[str]
+            Unique identifier for the Run to associate the Log to.
 
         path : typing.Optional[str]
             Path of the Flow, including the name. This locates the Flow in the Humanloop filesystem and is used as as a unique identifier. For example: `folder/name` or just `name`.
@@ -136,9 +135,6 @@ def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -197,10 +193,10 @@ def log(
             output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
             trace_status="incomplete",
             start_time=datetime.datetime.fromisoformat(
-                "2024-07-08 22:40:35+00:00",
+                "2024-07-08 21:40:35+00:00",
             ),
             end_time=datetime.datetime.fromisoformat(
-                "2024-07-08 22:40:39+00:00",
+                "2024-07-08 21:40:39+00:00",
             ),
         )
         """
@@ -212,7 +208,7 @@ def log(
                 "environment": environment,
             },
             json={
-                "evaluation_id": evaluation_id,
+                "run_id": run_id,
                 "path": path,
                 "id": id,
                 "start_time": start_time,
@@ -229,7 +225,6 @@ def log(
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": flow_log_request_environment,
                 "save": save,
@@ -1210,7 +1205,7 @@ async def log(
         *,
         version_id: typing.Optional[str] = None,
         environment: typing.Optional[str] = None,
-        evaluation_id: typing.Optional[str] = OMIT,
+        run_id: typing.Optional[str] = OMIT,
         path: typing.Optional[str] = OMIT,
         id: typing.Optional[str] = OMIT,
         start_time: typing.Optional[dt.datetime] = OMIT,
@@ -1227,7 +1222,6 @@ async def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         flow_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -1250,8 +1244,8 @@ async def log(
         environment : typing.Optional[str]
             Name of the Environment identifying a deployed version to log to.
 
-        evaluation_id : typing.Optional[str]
-            Unique identifier for the Evaluation Report to associate the Log to.
+        run_id : typing.Optional[str]
+            Unique identifier for the Run to associate the Log to.
 
         path : typing.Optional[str]
             Path of the Flow, including the name. This locates the Flow in the Humanloop filesystem and is used as as a unique identifier. For example: `folder/name` or just `name`.
@@ -1301,9 +1295,6 @@ async def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -1366,10 +1357,10 @@ async def main() -> None:
                 output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
                 trace_status="incomplete",
                 start_time=datetime.datetime.fromisoformat(
-                    "2024-07-08 22:40:35+00:00",
+                    "2024-07-08 21:40:35+00:00",
                 ),
                 end_time=datetime.datetime.fromisoformat(
-                    "2024-07-08 22:40:39+00:00",
+                    "2024-07-08 21:40:39+00:00",
                 ),
             )
 
@@ -1384,7 +1375,7 @@ async def main() -> None:
                 "environment": environment,
             },
             json={
-                "evaluation_id": evaluation_id,
+                "run_id": run_id,
                 "path": path,
                 "id": id,
                 "start_time": start_time,
@@ -1401,7 +1392,6 @@ async def main() -> None:
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": flow_log_request_environment,
                 "save": save,
diff --git a/src/humanloop/prompts/client.py b/src/humanloop/prompts/client.py
index a9332565..88cfa117 100644
--- a/src/humanloop/prompts/client.py
+++ b/src/humanloop/prompts/client.py
@@ -60,7 +60,7 @@ def log(
         *,
         version_id: typing.Optional[str] = None,
         environment: typing.Optional[str] = None,
-        evaluation_id: typing.Optional[str] = OMIT,
+        run_id: typing.Optional[str] = OMIT,
         path: typing.Optional[str] = OMIT,
         id: typing.Optional[str] = OMIT,
         output_message: typing.Optional[ChatMessageParams] = OMIT,
@@ -86,7 +86,6 @@ def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompt_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -111,8 +110,8 @@ def log(
         environment : typing.Optional[str]
             Name of the Environment identifying a deployed version to log to.
 
-        evaluation_id : typing.Optional[str]
-            Unique identifier for the Evaluation Report to associate the Log to.
+        run_id : typing.Optional[str]
+            Unique identifier for the Run to associate the Log to.
 
         path : typing.Optional[str]
             Path of the Prompt, including the name. This locates the Prompt in the Humanloop filesystem and is used as as a unique identifier. For example: `folder/name` or just `name`.
@@ -193,9 +192,6 @@ def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -236,7 +232,7 @@ def log(
             messages=[{"role": "user", "content": "What really happened at Roswell?"}],
             inputs={"person": "Trump"},
             created_at=datetime.datetime.fromisoformat(
-                "2024-07-19 00:29:35.178000+00:00",
+                "2024-07-18 23:29:35.178000+00:00",
             ),
             provider_latency=6.5931549072265625,
             output_message={
@@ -258,7 +254,7 @@ def log(
                 "environment": environment,
             },
             json={
-                "evaluation_id": evaluation_id,
+                "run_id": run_id,
                 "path": path,
                 "id": id,
                 "output_message": convert_and_respect_annotation_metadata(
@@ -292,7 +288,6 @@ def log(
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompt_log_request_environment,
                 "save": save,
@@ -523,7 +518,6 @@ def call_stream(
         end_time: typing.Optional[dt.datetime] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompts_call_stream_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -596,9 +590,6 @@ def call_stream(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -672,7 +663,6 @@ def call_stream(
             ),
             source_datapoint_id="string",
             trace_parent_id="string",
-            batch_id="string",
             user="string",
             prompts_call_stream_request_environment="string",
             save=True,
@@ -720,7 +710,6 @@ def call_stream(
                 "end_time": end_time,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompts_call_stream_request_environment,
                 "save": save,
@@ -784,7 +773,6 @@ def call(
         end_time: typing.Optional[dt.datetime] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompts_call_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -857,9 +845,6 @@ def call(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -956,7 +941,6 @@ def call(
                 "end_time": end_time,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompts_call_request_environment,
                 "save": save,
@@ -1935,7 +1919,7 @@ async def log(
         *,
         version_id: typing.Optional[str] = None,
         environment: typing.Optional[str] = None,
-        evaluation_id: typing.Optional[str] = OMIT,
+        run_id: typing.Optional[str] = OMIT,
         path: typing.Optional[str] = OMIT,
         id: typing.Optional[str] = OMIT,
         output_message: typing.Optional[ChatMessageParams] = OMIT,
@@ -1961,7 +1945,6 @@ async def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompt_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -1986,8 +1969,8 @@ async def log(
         environment : typing.Optional[str]
             Name of the Environment identifying a deployed version to log to.
 
-        evaluation_id : typing.Optional[str]
-            Unique identifier for the Evaluation Report to associate the Log to.
+        run_id : typing.Optional[str]
+            Unique identifier for the Run to associate the Log to.
 
         path : typing.Optional[str]
             Path of the Prompt, including the name. This locates the Prompt in the Humanloop filesystem and is used as as a unique identifier. For example: `folder/name` or just `name`.
@@ -2068,9 +2051,6 @@ async def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -2117,7 +2097,7 @@ async def main() -> None:
                 ],
                 inputs={"person": "Trump"},
                 created_at=datetime.datetime.fromisoformat(
-                    "2024-07-19 00:29:35.178000+00:00",
+                    "2024-07-18 23:29:35.178000+00:00",
                 ),
                 provider_latency=6.5931549072265625,
                 output_message={
@@ -2142,7 +2122,7 @@ async def main() -> None:
                 "environment": environment,
             },
             json={
-                "evaluation_id": evaluation_id,
+                "run_id": run_id,
                 "path": path,
                 "id": id,
                 "output_message": convert_and_respect_annotation_metadata(
@@ -2176,7 +2156,6 @@ async def main() -> None:
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompt_log_request_environment,
                 "save": save,
@@ -2415,7 +2394,6 @@ async def call_stream(
         end_time: typing.Optional[dt.datetime] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompts_call_stream_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -2488,9 +2466,6 @@ async def call_stream(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -2568,7 +2543,6 @@ async def main() -> None:
                 ),
                 source_datapoint_id="string",
                 trace_parent_id="string",
-                batch_id="string",
                 user="string",
                 prompts_call_stream_request_environment="string",
                 save=True,
@@ -2619,7 +2593,6 @@ async def main() -> None:
                 "end_time": end_time,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompts_call_stream_request_environment,
                 "save": save,
@@ -2683,7 +2656,6 @@ async def call(
         end_time: typing.Optional[dt.datetime] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         prompts_call_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -2756,9 +2728,6 @@ async def call(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -2863,7 +2832,6 @@ async def main() -> None:
                 "end_time": end_time,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": prompts_call_request_environment,
                 "save": save,
diff --git a/src/humanloop/requests/__init__.py b/src/humanloop/requests/__init__.py
index a8f2e97e..e33e9078 100644
--- a/src/humanloop/requests/__init__.py
+++ b/src/humanloop/requests/__init__.py
@@ -23,12 +23,13 @@
     DirectoryWithParentsAndChildrenResponseFilesItemParams,
 )
 from .environment_response import EnvironmentResponseParams
-from .evaluated_version_response import EvaluatedVersionResponseParams
 from .evaluatee_request import EvaluateeRequestParams
 from .evaluatee_response import EvaluateeResponseParams
 from .evaluation_evaluator_response import EvaluationEvaluatorResponseParams
-from .evaluation_report_log_response import EvaluationReportLogResponseParams
+from .evaluation_log_response import EvaluationLogResponseParams
 from .evaluation_response import EvaluationResponseParams
+from .evaluation_run_response import EvaluationRunResponseParams
+from .evaluation_runs_response import EvaluationRunsResponseParams
 from .evaluation_stats import EvaluationStatsParams
 from .evaluations_dataset_request import EvaluationsDatasetRequestParams
 from .evaluations_request import EvaluationsRequestParams
@@ -74,7 +75,7 @@
 from .monitoring_evaluator_version_request import MonitoringEvaluatorVersionRequestParams
 from .numeric_evaluator_stats_response import NumericEvaluatorStatsResponseParams
 from .overall_stats import OverallStatsParams
-from .paginated_data_evaluation_report_log_response import PaginatedDataEvaluationReportLogResponseParams
+from .paginated_data_evaluation_log_response import PaginatedDataEvaluationLogResponseParams
 from .paginated_data_evaluator_response import PaginatedDataEvaluatorResponseParams
 from .paginated_data_flow_response import PaginatedDataFlowResponseParams
 from .paginated_data_log_response import PaginatedDataLogResponseParams
@@ -103,6 +104,9 @@
 from .prompt_response_template import PromptResponseTemplateParams
 from .provider_api_keys import ProviderApiKeysParams
 from .response_format import ResponseFormatParams
+from .run_stats_response import RunStatsResponseParams
+from .run_stats_response_evaluator_stats_item import RunStatsResponseEvaluatorStatsItemParams
+from .run_version_response import RunVersionResponseParams
 from .select_evaluator_stats_response import SelectEvaluatorStatsResponseParams
 from .text_chat_content import TextChatContentParams
 from .text_evaluator_stats_response import TextEvaluatorStatsResponseParams
@@ -119,6 +123,7 @@
 from .version_id_response import VersionIdResponseParams
 from .version_id_response_version import VersionIdResponseVersionParams
 from .version_reference_response import VersionReferenceResponseParams
+from .version_specification import VersionSpecificationParams
 from .version_stats_response import VersionStatsResponseParams
 from .version_stats_response_evaluator_version_stats_item import VersionStatsResponseEvaluatorVersionStatsItemParams
 
@@ -144,12 +149,13 @@
     "DirectoryWithParentsAndChildrenResponseFilesItemParams",
     "DirectoryWithParentsAndChildrenResponseParams",
     "EnvironmentResponseParams",
-    "EvaluatedVersionResponseParams",
     "EvaluateeRequestParams",
     "EvaluateeResponseParams",
     "EvaluationEvaluatorResponseParams",
-    "EvaluationReportLogResponseParams",
+    "EvaluationLogResponseParams",
     "EvaluationResponseParams",
+    "EvaluationRunResponseParams",
+    "EvaluationRunsResponseParams",
     "EvaluationStatsParams",
     "EvaluationsDatasetRequestParams",
     "EvaluationsRequestParams",
@@ -191,7 +197,7 @@
     "MonitoringEvaluatorVersionRequestParams",
     "NumericEvaluatorStatsResponseParams",
     "OverallStatsParams",
-    "PaginatedDataEvaluationReportLogResponseParams",
+    "PaginatedDataEvaluationLogResponseParams",
     "PaginatedDataEvaluatorResponseParams",
     "PaginatedDataFlowResponseParams",
     "PaginatedDataLogResponseParams",
@@ -216,6 +222,9 @@
     "PromptResponseTemplateParams",
     "ProviderApiKeysParams",
     "ResponseFormatParams",
+    "RunStatsResponseEvaluatorStatsItemParams",
+    "RunStatsResponseParams",
+    "RunVersionResponseParams",
     "SelectEvaluatorStatsResponseParams",
     "TextChatContentParams",
     "TextEvaluatorStatsResponseParams",
@@ -232,6 +241,7 @@
     "VersionIdResponseParams",
     "VersionIdResponseVersionParams",
     "VersionReferenceResponseParams",
+    "VersionSpecificationParams",
     "VersionStatsResponseEvaluatorVersionStatsItemParams",
     "VersionStatsResponseParams",
 ]
diff --git a/src/humanloop/requests/boolean_evaluator_stats_response.py b/src/humanloop/requests/boolean_evaluator_stats_response.py
index 33d9b44f..18618f40 100644
--- a/src/humanloop/requests/boolean_evaluator_stats_response.py
+++ b/src/humanloop/requests/boolean_evaluator_stats_response.py
@@ -6,7 +6,7 @@
 class BooleanEvaluatorStatsResponseParams(typing_extensions.TypedDict):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str
diff --git a/src/humanloop/requests/dataset_response.py b/src/humanloop/requests/dataset_response.py
index 941cf0d0..56fcc4ed 100644
--- a/src/humanloop/requests/dataset_response.py
+++ b/src/humanloop/requests/dataset_response.py
@@ -56,6 +56,16 @@ class DatasetResponseParams(typing_extensions.TypedDict):
     The user who created the Dataset.
     """
 
+    committed_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The user who committed the Dataset Version.
+    """
+
+    committed_at: typing_extensions.NotRequired[dt.datetime]
+    """
+    The date and time the Dataset Version was committed.
+    """
+
     status: VersionStatus
     """
     The status of the Dataset Version.
diff --git a/src/humanloop/requests/evaluatee_request.py b/src/humanloop/requests/evaluatee_request.py
index 2eba177f..26e048c5 100644
--- a/src/humanloop/requests/evaluatee_request.py
+++ b/src/humanloop/requests/evaluatee_request.py
@@ -38,7 +38,7 @@ class EvaluateeRequestParams(typing_extensions.TypedDict):
 
     batch_id: typing_extensions.NotRequired[str]
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     orchestrated: typing_extensions.NotRequired[bool]
diff --git a/src/humanloop/requests/evaluatee_response.py b/src/humanloop/requests/evaluatee_response.py
index 6ea5d9ba..411ba5ba 100644
--- a/src/humanloop/requests/evaluatee_response.py
+++ b/src/humanloop/requests/evaluatee_response.py
@@ -1,8 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
-from .evaluated_version_response import EvaluatedVersionResponseParams
 import typing_extensions
+from .run_version_response import RunVersionResponseParams
 import datetime as dt
 
 
@@ -11,10 +11,10 @@ class EvaluateeResponseParams(typing_extensions.TypedDict):
     Version of the Evaluatee being evaluated.
     """
 
-    version: EvaluatedVersionResponseParams
+    version: typing_extensions.NotRequired[RunVersionResponseParams]
     batch_id: typing_extensions.NotRequired[str]
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     orchestrated: bool
diff --git a/src/humanloop/requests/evaluation_report_log_response.py b/src/humanloop/requests/evaluation_log_response.py
similarity index 59%
rename from src/humanloop/requests/evaluation_report_log_response.py
rename to src/humanloop/requests/evaluation_log_response.py
index 5aa9a042..8fe5d762 100644
--- a/src/humanloop/requests/evaluation_report_log_response.py
+++ b/src/humanloop/requests/evaluation_log_response.py
@@ -1,17 +1,15 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
-from .evaluated_version_response import EvaluatedVersionResponseParams
 from .datapoint_response import DatapointResponseParams
-import typing_extensions
 from .log_response import LogResponseParams
 import typing
 
 
-class EvaluationReportLogResponseParams(typing_extensions.TypedDict):
-    evaluated_version: EvaluatedVersionResponseParams
+class EvaluationLogResponseParams(typing_extensions.TypedDict):
+    run_id: str
     """
-    The version of the Prompt, Tool or Evaluator that the Log belongs to.
+    Unique identifier for the Run.
     """
 
     datapoint: DatapointResponseParams
@@ -19,7 +17,7 @@ class EvaluationReportLogResponseParams(typing_extensions.TypedDict):
     The Datapoint used to generate the Log
     """
 
-    log: typing_extensions.NotRequired[LogResponseParams]
+    log: LogResponseParams
     """
     The Log that was evaluated by the Evaluator.
     """
diff --git a/src/humanloop/requests/evaluation_response.py b/src/humanloop/requests/evaluation_response.py
index fe09cad4..27d9da73 100644
--- a/src/humanloop/requests/evaluation_response.py
+++ b/src/humanloop/requests/evaluation_response.py
@@ -1,11 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
-from .dataset_response import DatasetResponseParams
 import typing
-from .evaluatee_response import EvaluateeResponseParams
 from .evaluation_evaluator_response import EvaluationEvaluatorResponseParams
-from ..types.evaluation_status import EvaluationStatus
 import typing_extensions
 import datetime as dt
 from ..types.user_response import UserResponse
@@ -17,14 +14,9 @@ class EvaluationResponseParams(typing_extensions.TypedDict):
     Unique identifier for the Evaluation. Starts with `evr`.
     """
 
-    dataset: DatasetResponseParams
+    runs_count: int
     """
-    The Dataset used in the Evaluation.
-    """
-
-    evaluatees: typing.Sequence[EvaluateeResponseParams]
-    """
-    The Prompt/Tool Versions included in the Evaluation.
+    The total number of Runs in the Evaluation.
     """
 
     evaluators: typing.Sequence[EvaluationEvaluatorResponseParams]
@@ -32,16 +24,6 @@ class EvaluationResponseParams(typing_extensions.TypedDict):
     The Evaluator Versions used to evaluate.
     """
 
-    status: EvaluationStatus
-    """
-    The current status of the Evaluation.
-    
-    - `"pending"`: The Evaluation has been created but is not actively being worked on by Humanloop.
-    - `"running"`: Humanloop is checking for any missing Logs and Evaluator Logs, and will generate them where appropriate.
-    - `"completed"`: All Logs an Evaluator Logs have been generated.
-    - `"cancelled"`: The Evaluation has been cancelled by the user. Humanloop will stop generating Logs and Evaluator Logs.
-    """
-
     name: typing_extensions.NotRequired[str]
     """
     Name of the Evaluation to help identify it. Must be unique among Evaluations associated with File.
diff --git a/src/humanloop/requests/evaluation_run_response.py b/src/humanloop/requests/evaluation_run_response.py
new file mode 100644
index 00000000..98ccfd75
--- /dev/null
+++ b/src/humanloop/requests/evaluation_run_response.py
@@ -0,0 +1,56 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing_extensions
+import typing_extensions
+from .dataset_response import DatasetResponseParams
+from .run_version_response import RunVersionResponseParams
+import datetime as dt
+from ..types.user_response import UserResponse
+from ..types.evaluation_status import EvaluationStatus
+
+
+class EvaluationRunResponseParams(typing_extensions.TypedDict):
+    id: str
+    """
+    Unique identifier for the Run.
+    """
+
+    dataset: typing_extensions.NotRequired[DatasetResponseParams]
+    """
+    The Dataset used in the Run.
+    """
+
+    version: typing_extensions.NotRequired[RunVersionResponseParams]
+    """
+    The version used in the Run.
+    """
+
+    orchestrated: bool
+    """
+    Whether the Run is orchestrated by Humanloop.
+    """
+
+    added_at: dt.datetime
+    """
+    When the Run was added to the Evaluation.
+    """
+
+    created_at: dt.datetime
+    """
+    When the Run was created.
+    """
+
+    created_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The User who created the Run.
+    """
+
+    status: EvaluationStatus
+    """
+    The status of the Run.
+    """
+
+    control: bool
+    """
+    Stats for other Runs will be displayed in comparison to the control Run.
+    """
diff --git a/src/humanloop/requests/evaluation_runs_response.py b/src/humanloop/requests/evaluation_runs_response.py
new file mode 100644
index 00000000..a6e86d68
--- /dev/null
+++ b/src/humanloop/requests/evaluation_runs_response.py
@@ -0,0 +1,12 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing_extensions
+import typing
+from .evaluation_run_response import EvaluationRunResponseParams
+
+
+class EvaluationRunsResponseParams(typing_extensions.TypedDict):
+    runs: typing.Sequence[EvaluationRunResponseParams]
+    """
+    The Runs in the Evaluation.
+    """
diff --git a/src/humanloop/requests/evaluation_stats.py b/src/humanloop/requests/evaluation_stats.py
index b605ac2b..0a5a6a4a 100644
--- a/src/humanloop/requests/evaluation_stats.py
+++ b/src/humanloop/requests/evaluation_stats.py
@@ -1,22 +1,16 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
-from .overall_stats import OverallStatsParams
 import typing
-from .version_stats_response import VersionStatsResponseParams
+from .run_stats_response import RunStatsResponseParams
 import typing_extensions
 from ..types.evaluation_status import EvaluationStatus
 
 
 class EvaluationStatsParams(typing_extensions.TypedDict):
-    overall_stats: OverallStatsParams
+    run_stats: typing.Sequence[RunStatsResponseParams]
     """
-    Stats for the Evaluation Report as a whole.
-    """
-
-    version_stats: typing.Sequence[VersionStatsResponseParams]
-    """
-    Stats for each Evaluated Version in the Evaluation Report.
+    Stats for each Run in the Evaluation.
     """
 
     progress: typing_extensions.NotRequired[str]
diff --git a/src/humanloop/requests/evaluator_response.py b/src/humanloop/requests/evaluator_response.py
index 888a55ff..609c11e4 100644
--- a/src/humanloop/requests/evaluator_response.py
+++ b/src/humanloop/requests/evaluator_response.py
@@ -66,6 +66,16 @@ class EvaluatorResponseParams(typing_extensions.TypedDict):
     The user who created the Evaluator.
     """
 
+    committed_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The user who committed the Evaluator Version.
+    """
+
+    committed_at: typing_extensions.NotRequired[dt.datetime]
+    """
+    The date and time the Evaluator Version was committed.
+    """
+
     status: VersionStatus
     last_used_at: dt.datetime
     version_logs_count: int
diff --git a/src/humanloop/requests/flow_response.py b/src/humanloop/requests/flow_response.py
index 27a004ec..60b7753a 100644
--- a/src/humanloop/requests/flow_response.py
+++ b/src/humanloop/requests/flow_response.py
@@ -68,6 +68,16 @@ class FlowResponseParams(typing_extensions.TypedDict):
     The user who created the Flow.
     """
 
+    committed_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The user who committed the Flow Version.
+    """
+
+    committed_at: typing_extensions.NotRequired[dt.datetime]
+    """
+    The date and time the Flow Version was committed.
+    """
+
     status: VersionStatus
     """
     The status of the Flow Version.
diff --git a/src/humanloop/requests/numeric_evaluator_stats_response.py b/src/humanloop/requests/numeric_evaluator_stats_response.py
index 91eb5b4c..4edbda84 100644
--- a/src/humanloop/requests/numeric_evaluator_stats_response.py
+++ b/src/humanloop/requests/numeric_evaluator_stats_response.py
@@ -8,7 +8,7 @@
 class NumericEvaluatorStatsResponseParams(typing_extensions.TypedDict):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str
diff --git a/src/humanloop/requests/overall_stats.py b/src/humanloop/requests/overall_stats.py
index 5946d210..da04f19f 100644
--- a/src/humanloop/requests/overall_stats.py
+++ b/src/humanloop/requests/overall_stats.py
@@ -6,15 +6,15 @@
 class OverallStatsParams(typing_extensions.TypedDict):
     num_datapoints: int
     """
-    The total number of Datapoints in the Evaluation Report's Dataset Version.
+    The total number of Datapoints in the Evaluation's Dataset Version.
     """
 
     total_logs: int
     """
-    The total number of Logs in the Evaluation Report.
+    The total number of Logs in the Evaluation.
     """
 
     total_evaluator_logs: int
     """
-    The total number of Evaluator Logs in the Evaluation Report.
+    The total number of Evaluator Logs in the Evaluation.
     """
diff --git a/src/humanloop/requests/paginated_data_evaluation_log_response.py b/src/humanloop/requests/paginated_data_evaluation_log_response.py
new file mode 100644
index 00000000..e9723472
--- /dev/null
+++ b/src/humanloop/requests/paginated_data_evaluation_log_response.py
@@ -0,0 +1,12 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing_extensions
+import typing
+from .evaluation_log_response import EvaluationLogResponseParams
+
+
+class PaginatedDataEvaluationLogResponseParams(typing_extensions.TypedDict):
+    records: typing.Sequence[EvaluationLogResponseParams]
+    page: int
+    size: int
+    total: int
diff --git a/src/humanloop/requests/paginated_data_evaluation_report_log_response.py b/src/humanloop/requests/paginated_data_evaluation_report_log_response.py
deleted file mode 100644
index bdc88d6a..00000000
--- a/src/humanloop/requests/paginated_data_evaluation_report_log_response.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing_extensions
-import typing
-from .evaluation_report_log_response import EvaluationReportLogResponseParams
-
-
-class PaginatedDataEvaluationReportLogResponseParams(typing_extensions.TypedDict):
-    records: typing.Sequence[EvaluationReportLogResponseParams]
-    page: int
-    size: int
-    total: int
diff --git a/src/humanloop/requests/prompt_call_response.py b/src/humanloop/requests/prompt_call_response.py
index 685d6b44..7a66ecbd 100644
--- a/src/humanloop/requests/prompt_call_response.py
+++ b/src/humanloop/requests/prompt_call_response.py
@@ -70,11 +70,6 @@ class PromptCallResponseParams(typing_extensions.TypedDict):
     The ID of the parent Log to nest this Log under in a Trace.
     """
 
-    batch_id: typing_extensions.NotRequired[str]
-    """
-    Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    """
-
     user: typing_extensions.NotRequired[str]
     """
     End-user ID related to the Log.
diff --git a/src/humanloop/requests/prompt_response.py b/src/humanloop/requests/prompt_response.py
index 918813ec..50039007 100644
--- a/src/humanloop/requests/prompt_response.py
+++ b/src/humanloop/requests/prompt_response.py
@@ -159,6 +159,16 @@ class PromptResponseParams(typing_extensions.TypedDict):
     The user who created the Prompt.
     """
 
+    committed_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The user who committed the Prompt Version.
+    """
+
+    committed_at: typing_extensions.NotRequired[dt.datetime]
+    """
+    The date and time the Prompt Version was committed.
+    """
+
     status: VersionStatus
     """
     The status of the Prompt Version.
diff --git a/src/humanloop/requests/run_stats_response.py b/src/humanloop/requests/run_stats_response.py
new file mode 100644
index 00000000..0cb19389
--- /dev/null
+++ b/src/humanloop/requests/run_stats_response.py
@@ -0,0 +1,37 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing_extensions
+import typing_extensions
+import typing
+from .run_stats_response_evaluator_stats_item import RunStatsResponseEvaluatorStatsItemParams
+
+
+class RunStatsResponseParams(typing_extensions.TypedDict):
+    """
+    Stats for a Run in the Evaluation.
+    """
+
+    run_id: str
+    """
+    Unique identifier for the Run.
+    """
+
+    version_id: typing_extensions.NotRequired[str]
+    """
+    Unique identifier for the evaluated Version.
+    """
+
+    batch_id: typing_extensions.NotRequired[str]
+    """
+    Unique identifier for the batch of Logs to include in the Evaluation.
+    """
+
+    num_logs: int
+    """
+    The total number of existing Logs in this Run.
+    """
+
+    evaluator_stats: typing.Sequence[RunStatsResponseEvaluatorStatsItemParams]
+    """
+    Stats for each Evaluator Version applied to this Run.
+    """
diff --git a/src/humanloop/requests/run_stats_response_evaluator_stats_item.py b/src/humanloop/requests/run_stats_response_evaluator_stats_item.py
new file mode 100644
index 00000000..a42aea0b
--- /dev/null
+++ b/src/humanloop/requests/run_stats_response_evaluator_stats_item.py
@@ -0,0 +1,14 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .numeric_evaluator_stats_response import NumericEvaluatorStatsResponseParams
+from .boolean_evaluator_stats_response import BooleanEvaluatorStatsResponseParams
+from .select_evaluator_stats_response import SelectEvaluatorStatsResponseParams
+from .text_evaluator_stats_response import TextEvaluatorStatsResponseParams
+
+RunStatsResponseEvaluatorStatsItemParams = typing.Union[
+    NumericEvaluatorStatsResponseParams,
+    BooleanEvaluatorStatsResponseParams,
+    SelectEvaluatorStatsResponseParams,
+    TextEvaluatorStatsResponseParams,
+]
diff --git a/src/humanloop/requests/evaluated_version_response.py b/src/humanloop/requests/run_version_response.py
similarity index 88%
rename from src/humanloop/requests/evaluated_version_response.py
rename to src/humanloop/requests/run_version_response.py
index d35a602d..879ea25c 100644
--- a/src/humanloop/requests/evaluated_version_response.py
+++ b/src/humanloop/requests/run_version_response.py
@@ -6,6 +6,6 @@
 from .evaluator_response import EvaluatorResponseParams
 from .flow_response import FlowResponseParams
 
-EvaluatedVersionResponseParams = typing.Union[
+RunVersionResponseParams = typing.Union[
     PromptResponseParams, ToolResponseParams, EvaluatorResponseParams, FlowResponseParams
 ]
diff --git a/src/humanloop/requests/text_evaluator_stats_response.py b/src/humanloop/requests/text_evaluator_stats_response.py
index d1d97f81..8f0f358d 100644
--- a/src/humanloop/requests/text_evaluator_stats_response.py
+++ b/src/humanloop/requests/text_evaluator_stats_response.py
@@ -6,7 +6,7 @@
 class TextEvaluatorStatsResponseParams(typing_extensions.TypedDict):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str
diff --git a/src/humanloop/requests/tool_response.py b/src/humanloop/requests/tool_response.py
index 44313db7..57b9b608 100644
--- a/src/humanloop/requests/tool_response.py
+++ b/src/humanloop/requests/tool_response.py
@@ -94,6 +94,16 @@ class ToolResponseParams(typing_extensions.TypedDict):
     The user who created the Tool.
     """
 
+    committed_by: typing_extensions.NotRequired[UserResponse]
+    """
+    The user who committed the Tool Version.
+    """
+
+    committed_at: typing_extensions.NotRequired[dt.datetime]
+    """
+    The date and time the Tool Version was committed.
+    """
+
     status: VersionStatus
     """
     The status of the Tool Version.
diff --git a/src/humanloop/requests/version_specification.py b/src/humanloop/requests/version_specification.py
new file mode 100644
index 00000000..34606269
--- /dev/null
+++ b/src/humanloop/requests/version_specification.py
@@ -0,0 +1,37 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing_extensions
+import typing_extensions
+
+
+class VersionSpecificationParams(typing_extensions.TypedDict):
+    """
+    Specification of a File version on Humanloop.
+
+    This can be done in a couple of ways:
+
+    - Specifying `version_id` directly.
+    - Specifying a File (and optionally an Environment).
+      - A File can be specified by either `path` or `file_id`.
+      - An Environment can be specified by `environment_id`. If no Environment is specified, the default Environment is used.
+    """
+
+    version_id: typing_extensions.NotRequired[str]
+    """
+    Unique identifier for the File Version. If provided, none of the other fields should be specified.
+    """
+
+    path: typing_extensions.NotRequired[str]
+    """
+    Path identifying a File. Provide either this or `file_id` if you want to specify a File.
+    """
+
+    file_id: typing_extensions.NotRequired[str]
+    """
+    Unique identifier for the File. Provide either this or `path` if you want to specify a File.
+    """
+
+    environment: typing_extensions.NotRequired[str]
+    """
+    Name of the Environment a Version is deployed to. Only provide this when specifying a File. If not provided (and a File is specified), the default Environment is used.
+    """
diff --git a/src/humanloop/requests/version_stats_response.py b/src/humanloop/requests/version_stats_response.py
index 34f753f4..053c0ac9 100644
--- a/src/humanloop/requests/version_stats_response.py
+++ b/src/humanloop/requests/version_stats_response.py
@@ -7,26 +7,22 @@
 
 
 class VersionStatsResponseParams(typing_extensions.TypedDict):
-    """
-    Stats for an Evaluated Version in the Evaluation Report.
-    """
-
     version_id: str
     """
-    Unique identifier for the Evaluated Version.
+    Unique identifier for the evaluated Version.
     """
 
     batch_id: typing_extensions.NotRequired[str]
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     num_logs: int
     """
-    The total number of existing Logs for this Evaluated Version within the Evaluation Report. These are Logs that have been generated by this Evaluated Version on a Datapoint belonging to the Evaluation Report's Dataset Version.
+    The total number of existing Logs in this Run.
     """
 
     evaluator_version_stats: typing.Sequence[VersionStatsResponseEvaluatorVersionStatsItemParams]
     """
-    Stats for each Evaluator Version used to evaluate this Evaluated Version.
+    Stats for each Evaluator Version applied to this Run.
     """
diff --git a/src/humanloop/tools/client.py b/src/humanloop/tools/client.py
index 4d23bb8e..7226e60b 100644
--- a/src/humanloop/tools/client.py
+++ b/src/humanloop/tools/client.py
@@ -62,7 +62,6 @@ def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         tool_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -136,9 +135,6 @@ def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -210,7 +206,6 @@ def log(
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": tool_log_request_environment,
                 "save": save,
@@ -1258,7 +1253,6 @@ async def log(
         metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
         source_datapoint_id: typing.Optional[str] = OMIT,
         trace_parent_id: typing.Optional[str] = OMIT,
-        batch_id: typing.Optional[str] = OMIT,
         user: typing.Optional[str] = OMIT,
         tool_log_request_environment: typing.Optional[str] = OMIT,
         save: typing.Optional[bool] = OMIT,
@@ -1332,9 +1326,6 @@ async def log(
         trace_parent_id : typing.Optional[str]
             The ID of the parent Log to nest this Log under in a Trace.
 
-        batch_id : typing.Optional[str]
-            Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-
         user : typing.Optional[str]
             End-user ID related to the Log.
 
@@ -1414,7 +1405,6 @@ async def main() -> None:
                 "metadata": metadata,
                 "source_datapoint_id": source_datapoint_id,
                 "trace_parent_id": trace_parent_id,
-                "batch_id": batch_id,
                 "user": user,
                 "environment": tool_log_request_environment,
                 "save": save,
diff --git a/src/humanloop/types/__init__.py b/src/humanloop/types/__init__.py
index c9b3180f..8c973b52 100644
--- a/src/humanloop/types/__init__.py
+++ b/src/humanloop/types/__init__.py
@@ -26,12 +26,13 @@
 from .directory_with_parents_and_children_response_files_item import DirectoryWithParentsAndChildrenResponseFilesItem
 from .environment_response import EnvironmentResponse
 from .environment_tag import EnvironmentTag
-from .evaluated_version_response import EvaluatedVersionResponse
 from .evaluatee_request import EvaluateeRequest
 from .evaluatee_response import EvaluateeResponse
 from .evaluation_evaluator_response import EvaluationEvaluatorResponse
-from .evaluation_report_log_response import EvaluationReportLogResponse
+from .evaluation_log_response import EvaluationLogResponse
 from .evaluation_response import EvaluationResponse
+from .evaluation_run_response import EvaluationRunResponse
+from .evaluation_runs_response import EvaluationRunsResponse
 from .evaluation_stats import EvaluationStats
 from .evaluation_status import EvaluationStatus
 from .evaluations_dataset_request import EvaluationsDatasetRequest
@@ -78,6 +79,7 @@
 from .list_tools import ListTools
 from .llm_evaluator_request import LlmEvaluatorRequest
 from .log_response import LogResponse
+from .logs_association_type import LogsAssociationType
 from .model_endpoints import ModelEndpoints
 from .model_providers import ModelProviders
 from .monitoring_evaluator_environment_request import MonitoringEvaluatorEnvironmentRequest
@@ -87,7 +89,7 @@
 from .numeric_evaluator_stats_response import NumericEvaluatorStatsResponse
 from .observability_status import ObservabilityStatus
 from .overall_stats import OverallStats
-from .paginated_data_evaluation_report_log_response import PaginatedDataEvaluationReportLogResponse
+from .paginated_data_evaluation_log_response import PaginatedDataEvaluationLogResponse
 from .paginated_data_evaluator_response import PaginatedDataEvaluatorResponse
 from .paginated_data_flow_response import PaginatedDataFlowResponse
 from .paginated_data_log_response import PaginatedDataLogResponse
@@ -121,6 +123,9 @@
 from .provider_api_keys import ProviderApiKeys
 from .response_format import ResponseFormat
 from .response_format_type import ResponseFormatType
+from .run_stats_response import RunStatsResponse
+from .run_stats_response_evaluator_stats_item import RunStatsResponseEvaluatorStatsItem
+from .run_version_response import RunVersionResponse
 from .select_evaluator_stats_response import SelectEvaluatorStatsResponse
 from .sort_order import SortOrder
 from .text_chat_content import TextChatContent
@@ -144,6 +149,7 @@
 from .version_id_response import VersionIdResponse
 from .version_id_response_version import VersionIdResponseVersion
 from .version_reference_response import VersionReferenceResponse
+from .version_specification import VersionSpecification
 from .version_stats_response import VersionStatsResponse
 from .version_stats_response_evaluator_version_stats_item import VersionStatsResponseEvaluatorVersionStatsItem
 from .version_status import VersionStatus
@@ -175,12 +181,13 @@
     "DirectoryWithParentsAndChildrenResponseFilesItem",
     "EnvironmentResponse",
     "EnvironmentTag",
-    "EvaluatedVersionResponse",
     "EvaluateeRequest",
     "EvaluateeResponse",
     "EvaluationEvaluatorResponse",
-    "EvaluationReportLogResponse",
+    "EvaluationLogResponse",
     "EvaluationResponse",
+    "EvaluationRunResponse",
+    "EvaluationRunsResponse",
     "EvaluationStats",
     "EvaluationStatus",
     "EvaluationsDatasetRequest",
@@ -225,6 +232,7 @@
     "ListTools",
     "LlmEvaluatorRequest",
     "LogResponse",
+    "LogsAssociationType",
     "ModelEndpoints",
     "ModelProviders",
     "MonitoringEvaluatorEnvironmentRequest",
@@ -234,7 +242,7 @@
     "NumericEvaluatorStatsResponse",
     "ObservabilityStatus",
     "OverallStats",
-    "PaginatedDataEvaluationReportLogResponse",
+    "PaginatedDataEvaluationLogResponse",
     "PaginatedDataEvaluatorResponse",
     "PaginatedDataFlowResponse",
     "PaginatedDataLogResponse",
@@ -264,6 +272,9 @@
     "ProviderApiKeys",
     "ResponseFormat",
     "ResponseFormatType",
+    "RunStatsResponse",
+    "RunStatsResponseEvaluatorStatsItem",
+    "RunVersionResponse",
     "SelectEvaluatorStatsResponse",
     "SortOrder",
     "TextChatContent",
@@ -287,6 +298,7 @@
     "VersionIdResponse",
     "VersionIdResponseVersion",
     "VersionReferenceResponse",
+    "VersionSpecification",
     "VersionStatsResponse",
     "VersionStatsResponseEvaluatorVersionStatsItem",
     "VersionStatus",
diff --git a/src/humanloop/types/boolean_evaluator_stats_response.py b/src/humanloop/types/boolean_evaluator_stats_response.py
index 9ce51712..3deca81b 100644
--- a/src/humanloop/types/boolean_evaluator_stats_response.py
+++ b/src/humanloop/types/boolean_evaluator_stats_response.py
@@ -9,7 +9,7 @@
 class BooleanEvaluatorStatsResponse(UncheckedBaseModel):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str = pydantic.Field()
diff --git a/src/humanloop/types/dataset_response.py b/src/humanloop/types/dataset_response.py
index 132a7abf..942a9ee1 100644
--- a/src/humanloop/types/dataset_response.py
+++ b/src/humanloop/types/dataset_response.py
@@ -57,6 +57,16 @@ class DatasetResponse(UncheckedBaseModel):
     The user who created the Dataset.
     """
 
+    committed_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The user who committed the Dataset Version.
+    """
+
+    committed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)
+    """
+    The date and time the Dataset Version was committed.
+    """
+
     status: VersionStatus = pydantic.Field()
     """
     The status of the Dataset Version.
diff --git a/src/humanloop/types/evaluatee_request.py b/src/humanloop/types/evaluatee_request.py
index 32f5f867..d976f840 100644
--- a/src/humanloop/types/evaluatee_request.py
+++ b/src/humanloop/types/evaluatee_request.py
@@ -40,7 +40,7 @@ class EvaluateeRequest(UncheckedBaseModel):
 
     batch_id: typing.Optional[str] = pydantic.Field(default=None)
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     orchestrated: typing.Optional[bool] = pydantic.Field(default=None)
diff --git a/src/humanloop/types/evaluatee_response.py b/src/humanloop/types/evaluatee_response.py
index 4dd78cb7..baa33f79 100644
--- a/src/humanloop/types/evaluatee_response.py
+++ b/src/humanloop/types/evaluatee_response.py
@@ -9,8 +9,8 @@
 from .tool_response import ToolResponse
 from .version_deployment_response import VersionDeploymentResponse
 from .version_id_response import VersionIdResponse
-from .evaluated_version_response import EvaluatedVersionResponse
 import typing
+from .run_version_response import RunVersionResponse
 import pydantic
 import datetime as dt
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
@@ -22,10 +22,10 @@ class EvaluateeResponse(UncheckedBaseModel):
     Version of the Evaluatee being evaluated.
     """
 
-    version: EvaluatedVersionResponse
+    version: typing.Optional[RunVersionResponse] = None
     batch_id: typing.Optional[str] = pydantic.Field(default=None)
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     orchestrated: bool = pydantic.Field()
diff --git a/src/humanloop/types/evaluation_report_log_response.py b/src/humanloop/types/evaluation_log_response.py
similarity index 53%
rename from src/humanloop/types/evaluation_report_log_response.py
rename to src/humanloop/types/evaluation_log_response.py
index a92d3414..d0ad938d 100644
--- a/src/humanloop/types/evaluation_report_log_response.py
+++ b/src/humanloop/types/evaluation_log_response.py
@@ -2,30 +2,29 @@
 
 from __future__ import annotations
 from ..core.unchecked_base_model import UncheckedBaseModel
+from .evaluator_log_response import EvaluatorLogResponse
 from .evaluator_response import EvaluatorResponse
+from .flow_log_response import FlowLogResponse
 from .flow_response import FlowResponse
 from .monitoring_evaluator_response import MonitoringEvaluatorResponse
+from .prompt_log_response import PromptLogResponse
 from .prompt_response import PromptResponse
+from .tool_log_response import ToolLogResponse
 from .tool_response import ToolResponse
 from .version_deployment_response import VersionDeploymentResponse
 from .version_id_response import VersionIdResponse
-from .evaluator_log_response import EvaluatorLogResponse
-from .flow_log_response import FlowLogResponse
-from .prompt_log_response import PromptLogResponse
-from .tool_log_response import ToolLogResponse
-from .evaluated_version_response import EvaluatedVersionResponse
 import pydantic
 from .datapoint_response import DatapointResponse
-import typing
 from .log_response import LogResponse
+import typing
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
 from ..core.pydantic_utilities import update_forward_refs
 
 
-class EvaluationReportLogResponse(UncheckedBaseModel):
-    evaluated_version: EvaluatedVersionResponse = pydantic.Field()
+class EvaluationLogResponse(UncheckedBaseModel):
+    run_id: str = pydantic.Field()
     """
-    The version of the Prompt, Tool or Evaluator that the Log belongs to.
+    Unique identifier for the Run.
     """
 
     datapoint: DatapointResponse = pydantic.Field()
@@ -33,7 +32,7 @@ class EvaluationReportLogResponse(UncheckedBaseModel):
     The Datapoint used to generate the Log
     """
 
-    log: typing.Optional[LogResponse] = pydantic.Field(default=None)
+    log: LogResponse = pydantic.Field()
     """
     The Log that was evaluated by the Evaluator.
     """
@@ -53,14 +52,14 @@ class Config:
             extra = pydantic.Extra.allow
 
 
-update_forward_refs(EvaluatorResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(FlowResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(MonitoringEvaluatorResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(PromptResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(ToolResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(VersionDeploymentResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(VersionIdResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(EvaluatorLogResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(FlowLogResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(PromptLogResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
-update_forward_refs(ToolLogResponse, EvaluationReportLogResponse=EvaluationReportLogResponse)
+update_forward_refs(EvaluatorLogResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(EvaluatorResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(FlowLogResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(FlowResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(MonitoringEvaluatorResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(PromptLogResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(PromptResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(ToolLogResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(ToolResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(VersionDeploymentResponse, EvaluationLogResponse=EvaluationLogResponse)
+update_forward_refs(VersionIdResponse, EvaluationLogResponse=EvaluationLogResponse)
diff --git a/src/humanloop/types/evaluation_response.py b/src/humanloop/types/evaluation_response.py
index b8864204..a4c2336a 100644
--- a/src/humanloop/types/evaluation_response.py
+++ b/src/humanloop/types/evaluation_response.py
@@ -10,11 +10,8 @@
 from .version_deployment_response import VersionDeploymentResponse
 from .version_id_response import VersionIdResponse
 import pydantic
-from .dataset_response import DatasetResponse
 import typing
-from .evaluatee_response import EvaluateeResponse
 from .evaluation_evaluator_response import EvaluationEvaluatorResponse
-from .evaluation_status import EvaluationStatus
 import datetime as dt
 from .user_response import UserResponse
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
@@ -27,14 +24,9 @@ class EvaluationResponse(UncheckedBaseModel):
     Unique identifier for the Evaluation. Starts with `evr`.
     """
 
-    dataset: DatasetResponse = pydantic.Field()
+    runs_count: int = pydantic.Field()
     """
-    The Dataset used in the Evaluation.
-    """
-
-    evaluatees: typing.List[EvaluateeResponse] = pydantic.Field()
-    """
-    The Prompt/Tool Versions included in the Evaluation.
+    The total number of Runs in the Evaluation.
     """
 
     evaluators: typing.List[EvaluationEvaluatorResponse] = pydantic.Field()
@@ -42,16 +34,6 @@ class EvaluationResponse(UncheckedBaseModel):
     The Evaluator Versions used to evaluate.
     """
 
-    status: EvaluationStatus = pydantic.Field()
-    """
-    The current status of the Evaluation.
-    
-    - `"pending"`: The Evaluation has been created but is not actively being worked on by Humanloop.
-    - `"running"`: Humanloop is checking for any missing Logs and Evaluator Logs, and will generate them where appropriate.
-    - `"completed"`: All Logs an Evaluator Logs have been generated.
-    - `"cancelled"`: The Evaluation has been cancelled by the user. Humanloop will stop generating Logs and Evaluator Logs.
-    """
-
     name: typing.Optional[str] = pydantic.Field(default=None)
     """
     Name of the Evaluation to help identify it. Must be unique among Evaluations associated with File.
diff --git a/src/humanloop/types/evaluation_run_response.py b/src/humanloop/types/evaluation_run_response.py
new file mode 100644
index 00000000..46f9308d
--- /dev/null
+++ b/src/humanloop/types/evaluation_run_response.py
@@ -0,0 +1,85 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+from ..core.unchecked_base_model import UncheckedBaseModel
+from .evaluator_response import EvaluatorResponse
+from .flow_response import FlowResponse
+from .monitoring_evaluator_response import MonitoringEvaluatorResponse
+from .prompt_response import PromptResponse
+from .tool_response import ToolResponse
+from .version_deployment_response import VersionDeploymentResponse
+from .version_id_response import VersionIdResponse
+import pydantic
+import typing
+from .dataset_response import DatasetResponse
+from .run_version_response import RunVersionResponse
+import datetime as dt
+from .user_response import UserResponse
+from .evaluation_status import EvaluationStatus
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.pydantic_utilities import update_forward_refs
+
+
+class EvaluationRunResponse(UncheckedBaseModel):
+    id: str = pydantic.Field()
+    """
+    Unique identifier for the Run.
+    """
+
+    dataset: typing.Optional[DatasetResponse] = pydantic.Field(default=None)
+    """
+    The Dataset used in the Run.
+    """
+
+    version: typing.Optional[RunVersionResponse] = pydantic.Field(default=None)
+    """
+    The version used in the Run.
+    """
+
+    orchestrated: bool = pydantic.Field()
+    """
+    Whether the Run is orchestrated by Humanloop.
+    """
+
+    added_at: dt.datetime = pydantic.Field()
+    """
+    When the Run was added to the Evaluation.
+    """
+
+    created_at: dt.datetime = pydantic.Field()
+    """
+    When the Run was created.
+    """
+
+    created_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The User who created the Run.
+    """
+
+    status: EvaluationStatus = pydantic.Field()
+    """
+    The status of the Run.
+    """
+
+    control: bool = pydantic.Field()
+    """
+    Stats for other Runs will be displayed in comparison to the control Run.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+update_forward_refs(EvaluatorResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(FlowResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(MonitoringEvaluatorResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(PromptResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(ToolResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(VersionDeploymentResponse, EvaluationRunResponse=EvaluationRunResponse)
+update_forward_refs(VersionIdResponse, EvaluationRunResponse=EvaluationRunResponse)
diff --git a/src/humanloop/types/evaluation_runs_response.py b/src/humanloop/types/evaluation_runs_response.py
new file mode 100644
index 00000000..208a7529
--- /dev/null
+++ b/src/humanloop/types/evaluation_runs_response.py
@@ -0,0 +1,41 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+from ..core.unchecked_base_model import UncheckedBaseModel
+from .evaluator_response import EvaluatorResponse
+from .flow_response import FlowResponse
+from .monitoring_evaluator_response import MonitoringEvaluatorResponse
+from .prompt_response import PromptResponse
+from .tool_response import ToolResponse
+from .version_deployment_response import VersionDeploymentResponse
+from .version_id_response import VersionIdResponse
+import typing
+from .evaluation_run_response import EvaluationRunResponse
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.pydantic_utilities import update_forward_refs
+
+
+class EvaluationRunsResponse(UncheckedBaseModel):
+    runs: typing.List[EvaluationRunResponse] = pydantic.Field()
+    """
+    The Runs in the Evaluation.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+update_forward_refs(EvaluatorResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(FlowResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(MonitoringEvaluatorResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(PromptResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(ToolResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(VersionDeploymentResponse, EvaluationRunsResponse=EvaluationRunsResponse)
+update_forward_refs(VersionIdResponse, EvaluationRunsResponse=EvaluationRunsResponse)
diff --git a/src/humanloop/types/evaluation_stats.py b/src/humanloop/types/evaluation_stats.py
index 350cf1db..9a6a07a7 100644
--- a/src/humanloop/types/evaluation_stats.py
+++ b/src/humanloop/types/evaluation_stats.py
@@ -1,23 +1,17 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from ..core.unchecked_base_model import UncheckedBaseModel
-from .overall_stats import OverallStats
-import pydantic
 import typing
-from .version_stats_response import VersionStatsResponse
+from .run_stats_response import RunStatsResponse
+import pydantic
 from .evaluation_status import EvaluationStatus
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
 
 
 class EvaluationStats(UncheckedBaseModel):
-    overall_stats: OverallStats = pydantic.Field()
-    """
-    Stats for the Evaluation Report as a whole.
-    """
-
-    version_stats: typing.List[VersionStatsResponse] = pydantic.Field()
+    run_stats: typing.List[RunStatsResponse] = pydantic.Field()
     """
-    Stats for each Evaluated Version in the Evaluation Report.
+    Stats for each Run in the Evaluation.
     """
 
     progress: typing.Optional[str] = pydantic.Field(default=None)
diff --git a/src/humanloop/types/evaluator_response.py b/src/humanloop/types/evaluator_response.py
index 69111519..fcaf0326 100644
--- a/src/humanloop/types/evaluator_response.py
+++ b/src/humanloop/types/evaluator_response.py
@@ -64,6 +64,16 @@ class EvaluatorResponse(UncheckedBaseModel):
     The user who created the Evaluator.
     """
 
+    committed_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The user who committed the Evaluator Version.
+    """
+
+    committed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)
+    """
+    The date and time the Evaluator Version was committed.
+    """
+
     status: VersionStatus
     last_used_at: dt.datetime
     version_logs_count: int = pydantic.Field()
diff --git a/src/humanloop/types/flow_response.py b/src/humanloop/types/flow_response.py
index 2c478605..874782a1 100644
--- a/src/humanloop/types/flow_response.py
+++ b/src/humanloop/types/flow_response.py
@@ -66,6 +66,16 @@ class FlowResponse(UncheckedBaseModel):
     The user who created the Flow.
     """
 
+    committed_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The user who committed the Flow Version.
+    """
+
+    committed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)
+    """
+    The date and time the Flow Version was committed.
+    """
+
     status: VersionStatus = pydantic.Field()
     """
     The status of the Flow Version.
diff --git a/src/humanloop/types/logs_association_type.py b/src/humanloop/types/logs_association_type.py
new file mode 100644
index 00000000..c904b93c
--- /dev/null
+++ b/src/humanloop/types/logs_association_type.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+LogsAssociationType = typing.Union[typing.Literal["dynamic", "fixed"], typing.Any]
diff --git a/src/humanloop/types/numeric_evaluator_stats_response.py b/src/humanloop/types/numeric_evaluator_stats_response.py
index 6ca2e662..eec24ff5 100644
--- a/src/humanloop/types/numeric_evaluator_stats_response.py
+++ b/src/humanloop/types/numeric_evaluator_stats_response.py
@@ -9,7 +9,7 @@
 class NumericEvaluatorStatsResponse(UncheckedBaseModel):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str = pydantic.Field()
diff --git a/src/humanloop/types/overall_stats.py b/src/humanloop/types/overall_stats.py
index 8258f898..b1d6e6dc 100644
--- a/src/humanloop/types/overall_stats.py
+++ b/src/humanloop/types/overall_stats.py
@@ -9,17 +9,17 @@
 class OverallStats(UncheckedBaseModel):
     num_datapoints: int = pydantic.Field()
     """
-    The total number of Datapoints in the Evaluation Report's Dataset Version.
+    The total number of Datapoints in the Evaluation's Dataset Version.
     """
 
     total_logs: int = pydantic.Field()
     """
-    The total number of Logs in the Evaluation Report.
+    The total number of Logs in the Evaluation.
     """
 
     total_evaluator_logs: int = pydantic.Field()
     """
-    The total number of Evaluator Logs in the Evaluation Report.
+    The total number of Evaluator Logs in the Evaluation.
     """
 
     if IS_PYDANTIC_V2:
diff --git a/src/humanloop/types/paginated_data_evaluation_log_response.py b/src/humanloop/types/paginated_data_evaluation_log_response.py
new file mode 100644
index 00000000..c6e19791
--- /dev/null
+++ b/src/humanloop/types/paginated_data_evaluation_log_response.py
@@ -0,0 +1,49 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+from ..core.unchecked_base_model import UncheckedBaseModel
+from .evaluator_log_response import EvaluatorLogResponse
+from .evaluator_response import EvaluatorResponse
+from .flow_log_response import FlowLogResponse
+from .flow_response import FlowResponse
+from .monitoring_evaluator_response import MonitoringEvaluatorResponse
+from .prompt_log_response import PromptLogResponse
+from .prompt_response import PromptResponse
+from .tool_log_response import ToolLogResponse
+from .tool_response import ToolResponse
+from .version_deployment_response import VersionDeploymentResponse
+from .version_id_response import VersionIdResponse
+import typing
+from .evaluation_log_response import EvaluationLogResponse
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+import pydantic
+from ..core.pydantic_utilities import update_forward_refs
+
+
+class PaginatedDataEvaluationLogResponse(UncheckedBaseModel):
+    records: typing.List[EvaluationLogResponse]
+    page: int
+    size: int
+    total: int
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
+update_forward_refs(EvaluatorLogResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(EvaluatorResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(FlowLogResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(FlowResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(MonitoringEvaluatorResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(PromptLogResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(PromptResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(ToolLogResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(ToolResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(VersionDeploymentResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
+update_forward_refs(VersionIdResponse, PaginatedDataEvaluationLogResponse=PaginatedDataEvaluationLogResponse)
diff --git a/src/humanloop/types/paginated_data_evaluation_report_log_response.py b/src/humanloop/types/paginated_data_evaluation_report_log_response.py
deleted file mode 100644
index 95c1725d..00000000
--- a/src/humanloop/types/paginated_data_evaluation_report_log_response.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-from ..core.unchecked_base_model import UncheckedBaseModel
-from .evaluator_log_response import EvaluatorLogResponse
-from .evaluator_response import EvaluatorResponse
-from .flow_log_response import FlowLogResponse
-from .flow_response import FlowResponse
-from .monitoring_evaluator_response import MonitoringEvaluatorResponse
-from .prompt_log_response import PromptLogResponse
-from .prompt_response import PromptResponse
-from .tool_log_response import ToolLogResponse
-from .tool_response import ToolResponse
-from .version_deployment_response import VersionDeploymentResponse
-from .version_id_response import VersionIdResponse
-import typing
-from .evaluation_report_log_response import EvaluationReportLogResponse
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
-from ..core.pydantic_utilities import update_forward_refs
-
-
-class PaginatedDataEvaluationReportLogResponse(UncheckedBaseModel):
-    records: typing.List[EvaluationReportLogResponse]
-    page: int
-    size: int
-    total: int
-
-    if IS_PYDANTIC_V2:
-        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
-    else:
-
-        class Config:
-            frozen = True
-            smart_union = True
-            extra = pydantic.Extra.allow
-
-
-update_forward_refs(
-    EvaluatorLogResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
-update_forward_refs(
-    EvaluatorResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
-update_forward_refs(FlowLogResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse)
-update_forward_refs(FlowResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse)
-update_forward_refs(
-    MonitoringEvaluatorResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
-update_forward_refs(
-    PromptLogResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
-update_forward_refs(PromptResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse)
-update_forward_refs(ToolLogResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse)
-update_forward_refs(ToolResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse)
-update_forward_refs(
-    VersionDeploymentResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
-update_forward_refs(
-    VersionIdResponse, PaginatedDataEvaluationReportLogResponse=PaginatedDataEvaluationReportLogResponse
-)
diff --git a/src/humanloop/types/prompt_call_response.py b/src/humanloop/types/prompt_call_response.py
index 64db5f49..492d10aa 100644
--- a/src/humanloop/types/prompt_call_response.py
+++ b/src/humanloop/types/prompt_call_response.py
@@ -79,11 +79,6 @@ class PromptCallResponse(UncheckedBaseModel):
     The ID of the parent Log to nest this Log under in a Trace.
     """
 
-    batch_id: typing.Optional[str] = pydantic.Field(default=None)
-    """
-    Unique identifier for the Batch to add this Batch to. Batches are used to group Logs together for Evaluations. A Batch will be created if one with the given ID does not exist.
-    """
-
     user: typing.Optional[str] = pydantic.Field(default=None)
     """
     End-user ID related to the Log.
diff --git a/src/humanloop/types/prompt_response.py b/src/humanloop/types/prompt_response.py
index 6f1029f5..64db52d5 100644
--- a/src/humanloop/types/prompt_response.py
+++ b/src/humanloop/types/prompt_response.py
@@ -157,6 +157,16 @@ class PromptResponse(UncheckedBaseModel):
     The user who created the Prompt.
     """
 
+    committed_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The user who committed the Prompt Version.
+    """
+
+    committed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)
+    """
+    The date and time the Prompt Version was committed.
+    """
+
     status: VersionStatus = pydantic.Field()
     """
     The status of the Prompt Version.
diff --git a/src/humanloop/types/run_stats_response.py b/src/humanloop/types/run_stats_response.py
new file mode 100644
index 00000000..201c6e76
--- /dev/null
+++ b/src/humanloop/types/run_stats_response.py
@@ -0,0 +1,47 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ..core.unchecked_base_model import UncheckedBaseModel
+import pydantic
+import typing
+from .run_stats_response_evaluator_stats_item import RunStatsResponseEvaluatorStatsItem
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+
+
+class RunStatsResponse(UncheckedBaseModel):
+    """
+    Stats for a Run in the Evaluation.
+    """
+
+    run_id: str = pydantic.Field()
+    """
+    Unique identifier for the Run.
+    """
+
+    version_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier for the evaluated Version.
+    """
+
+    batch_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier for the batch of Logs to include in the Evaluation.
+    """
+
+    num_logs: int = pydantic.Field()
+    """
+    The total number of existing Logs in this Run.
+    """
+
+    evaluator_stats: typing.List[RunStatsResponseEvaluatorStatsItem] = pydantic.Field()
+    """
+    Stats for each Evaluator Version applied to this Run.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/humanloop/types/run_stats_response_evaluator_stats_item.py b/src/humanloop/types/run_stats_response_evaluator_stats_item.py
new file mode 100644
index 00000000..c7fe6056
--- /dev/null
+++ b/src/humanloop/types/run_stats_response_evaluator_stats_item.py
@@ -0,0 +1,14 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from .numeric_evaluator_stats_response import NumericEvaluatorStatsResponse
+from .boolean_evaluator_stats_response import BooleanEvaluatorStatsResponse
+from .select_evaluator_stats_response import SelectEvaluatorStatsResponse
+from .text_evaluator_stats_response import TextEvaluatorStatsResponse
+
+RunStatsResponseEvaluatorStatsItem = typing.Union[
+    NumericEvaluatorStatsResponse,
+    BooleanEvaluatorStatsResponse,
+    SelectEvaluatorStatsResponse,
+    TextEvaluatorStatsResponse,
+]
diff --git a/src/humanloop/types/evaluated_version_response.py b/src/humanloop/types/run_version_response.py
similarity index 71%
rename from src/humanloop/types/evaluated_version_response.py
rename to src/humanloop/types/run_version_response.py
index 3064bfb1..d94b1178 100644
--- a/src/humanloop/types/evaluated_version_response.py
+++ b/src/humanloop/types/run_version_response.py
@@ -6,4 +6,4 @@
 from .evaluator_response import EvaluatorResponse
 from .flow_response import FlowResponse
 
-EvaluatedVersionResponse = typing.Union[PromptResponse, ToolResponse, EvaluatorResponse, FlowResponse]
+RunVersionResponse = typing.Union[PromptResponse, ToolResponse, EvaluatorResponse, FlowResponse]
diff --git a/src/humanloop/types/text_evaluator_stats_response.py b/src/humanloop/types/text_evaluator_stats_response.py
index 735b4eb7..652c7aa6 100644
--- a/src/humanloop/types/text_evaluator_stats_response.py
+++ b/src/humanloop/types/text_evaluator_stats_response.py
@@ -9,7 +9,7 @@
 class TextEvaluatorStatsResponse(UncheckedBaseModel):
     """
     Base attributes for stats for an Evaluator Version-Evaluated Version pair
-    in the Evaluation Report.
+    in the Evaluation.
     """
 
     evaluator_version_id: str = pydantic.Field()
diff --git a/src/humanloop/types/tool_response.py b/src/humanloop/types/tool_response.py
index 3099da27..c1db98bb 100644
--- a/src/humanloop/types/tool_response.py
+++ b/src/humanloop/types/tool_response.py
@@ -92,6 +92,16 @@ class ToolResponse(UncheckedBaseModel):
     The user who created the Tool.
     """
 
+    committed_by: typing.Optional[UserResponse] = pydantic.Field(default=None)
+    """
+    The user who committed the Tool Version.
+    """
+
+    committed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)
+    """
+    The date and time the Tool Version was committed.
+    """
+
     status: VersionStatus = pydantic.Field()
     """
     The status of the Tool Version.
diff --git a/src/humanloop/types/version_specification.py b/src/humanloop/types/version_specification.py
new file mode 100644
index 00000000..bb3464ce
--- /dev/null
+++ b/src/humanloop/types/version_specification.py
@@ -0,0 +1,48 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ..core.unchecked_base_model import UncheckedBaseModel
+import typing
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+
+
+class VersionSpecification(UncheckedBaseModel):
+    """
+    Specification of a File version on Humanloop.
+
+    This can be done in a couple of ways:
+
+    - Specifying `version_id` directly.
+    - Specifying a File (and optionally an Environment).
+      - A File can be specified by either `path` or `file_id`.
+      - An Environment can be specified by `environment_id`. If no Environment is specified, the default Environment is used.
+    """
+
+    version_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier for the File Version. If provided, none of the other fields should be specified.
+    """
+
+    path: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Path identifying a File. Provide either this or `file_id` if you want to specify a File.
+    """
+
+    file_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier for the File. Provide either this or `path` if you want to specify a File.
+    """
+
+    environment: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Name of the Environment a Version is deployed to. Only provide this when specifying a File. If not provided (and a File is specified), the default Environment is used.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/humanloop/types/version_stats_response.py b/src/humanloop/types/version_stats_response.py
index 25c0a682..6439fca4 100644
--- a/src/humanloop/types/version_stats_response.py
+++ b/src/humanloop/types/version_stats_response.py
@@ -8,28 +8,24 @@
 
 
 class VersionStatsResponse(UncheckedBaseModel):
-    """
-    Stats for an Evaluated Version in the Evaluation Report.
-    """
-
     version_id: str = pydantic.Field()
     """
-    Unique identifier for the Evaluated Version.
+    Unique identifier for the evaluated Version.
     """
 
     batch_id: typing.Optional[str] = pydantic.Field(default=None)
     """
-    Unique identifier for the batch of Logs to include in the Evaluation Report.
+    Unique identifier for the batch of Logs to include in the Evaluation.
     """
 
     num_logs: int = pydantic.Field()
     """
-    The total number of existing Logs for this Evaluated Version within the Evaluation Report. These are Logs that have been generated by this Evaluated Version on a Datapoint belonging to the Evaluation Report's Dataset Version.
+    The total number of existing Logs in this Run.
     """
 
     evaluator_version_stats: typing.List[VersionStatsResponseEvaluatorVersionStatsItem] = pydantic.Field()
     """
-    Stats for each Evaluator Version used to evaluate this Evaluated Version.
+    Stats for each Evaluator Version applied to this Run.
     """
 
     if IS_PYDANTIC_V2: