Skip to content

Commit 1fe0cd9

Browse files
committed
Merge branch 'main' into add_checkout
2 parents 9814077 + aa8712a commit 1fe0cd9

File tree

15 files changed

+440
-284
lines changed

15 files changed

+440
-284
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
numpy-version: "1.26"
2424

2525
steps:
26-
- uses: actions/checkout@v5
26+
- uses: actions/checkout@v6
2727

2828
- name: Set up Python ${{ matrix.python-version }}
2929
uses: actions/setup-python@v6

.github/workflows/cibuildwheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
artifact_name: "macos-universal2"
6161
steps:
6262
- name: Checkout repo
63-
uses: actions/checkout@v5
63+
uses: actions/checkout@v6
6464

6565
- name: Set up Python
6666
uses: actions/setup-python@v6
@@ -83,7 +83,7 @@ jobs:
8383
arch: amd64
8484

8585
- name: Build wheels
86-
uses: pypa/cibuildwheel@v3.2
86+
uses: pypa/cibuildwheel@v3.3
8787

8888
- name: Make sdist
8989
if: ${{ matrix.os == 'ubuntu-latest' }}

.github/workflows/wasm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030

3131
steps:
3232
- name: Checkout repo
33-
uses: actions/checkout@v5
33+
uses: actions/checkout@v6
3434

3535
- name: Set up Python
3636
uses: actions/setup-python@v6

bench/ndarray/roofline-analysis.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,9 @@ def setup_arrays(mem_mode: bool):
153153
shape = (15_000, 15_000)
154154
large_shape = (2,) + shape
155155
else:
156-
shape = (30_000, 30_000)
157-
large_shape = (15,) + shape
156+
# shape = (30_000, 30_000)
157+
shape = (15_000, 15_000)
158+
large_shape = (60,) + shape
158159

159160
nelem = math.prod(shape)
160161
large_nelem = math.prod(large_shape)

bench/ndarray/roofline-mem-speed-plot.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,54 @@ def extract_xy(mem_dict):
108108
va="top",
109109
)
110110

111+
# --- single workload label per workload name (avoid duplicates) ---
112+
# Build a map: workload name -> list of (intensity, gflops) across mem_4800/mem_6000
113+
workload_map: dict[str, dict[str, list[float]]] = {}
114+
115+
for workload, metrics in mem_4800.items():
116+
intensity = metrics["Intensity"]
117+
gflops = metrics["GFLOPS"]
118+
if workload not in workload_map:
119+
workload_map[workload] = {"intensity": [], "gflops": []}
120+
workload_map[workload]["intensity"].append(intensity)
121+
workload_map[workload]["gflops"].append(gflops)
122+
123+
for workload, metrics in mem_6000.items():
124+
intensity = metrics["Intensity"]
125+
gflops = metrics["GFLOPS"]
126+
if workload not in workload_map:
127+
workload_map[workload] = {"intensity": [], "gflops": []}
128+
workload_map[workload]["intensity"].append(intensity)
129+
workload_map[workload]["gflops"].append(gflops)
130+
131+
# Place a single label per workload at the average intensity and slightly below
132+
# the minimum GFLOPS across both memory speeds for that workload.
133+
for workload, vals in workload_map.items():
134+
intensities = vals["intensity"]
135+
gflops_list = vals["gflops"]
136+
x_label = sum(intensities) / len(intensities)
137+
y_min = min(gflops_list)
138+
raw_ypos = y_min * 0.6
139+
140+
ymin_curr, _ = ax.get_ylim()
141+
safe_ypos = max(raw_ypos, ymin_curr * 1.5 if ymin_curr > 0 else raw_ypos)
142+
143+
# Avoid overlap between matmul1 and matmul2 by using different vertical offsets
144+
if workload == "matmul1":
145+
safe_ypos *= .8 # push matmul1 a bit higher
146+
elif workload == "matmul2":
147+
safe_ypos *= 1.2 # keep matmul2 lower
148+
149+
ax.annotate(
150+
workload,
151+
(x_label, safe_ypos),
152+
ha="center",
153+
va="top",
154+
fontsize=10,
155+
alpha=0.9,
156+
)
157+
# --------------------------------------------------------------
158+
111159
ax.set_xlabel("Arithmetic Intensity (FLOPs/element)")
112160
ax.set_ylabel("Performance (GFLOPS/sec)")
113161
ax.set_title("Memory speed impact on NumPy/NumExpr performance\nAMD 7800X3D (in-memory)")

0 commit comments

Comments
 (0)