Skip to content

Commit 7949c60

Browse files
committed
Faster SIMD approach
1 parent 1c478fe commit 7949c60

File tree

2 files changed

+142
-124
lines changed

2 files changed

+142
-124
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Advent of Code [![checks-badge]][checks-link] [![docs-badge]][docs-link]
22

33
Blazing fast Rust solutions for every [Advent of Code] puzzle from 2015 to 2024, taking
4-
**498 milliseconds** to solve all 500 stars. Each solution is carefully optimized for performance
4+
**497 milliseconds** to solve all 500 stars. Each solution is carefully optimized for performance
55
while ensuring the code remains concise, readable, and idiomatic.
66

77
## Features
@@ -67,7 +67,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
6767

6868
| Year | [2015](#2015) | [2016](#2016) | [2017](#2017) | [2018](#2018) | [2019](#2019) | [2020](#2020) | [2021](#2021) | [2022](#2022) | [2023](#2023) | [2024](#2024) |
6969
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
70-
| Benchmark (ms) | 15 | 109 | 82 | 35 | 14 | 220 | 8 | 6 | 5 | 4 |
70+
| Benchmark (ms) | 15 | 109 | 82 | 35 | 14 | 220 | 8 | 5 | 5 | 4 |
7171

7272
## 2024
7373

@@ -161,7 +161,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
161161
| 20 | [Grove Positioning System](https://adventofcode.com/2022/day/20) | [Source](src/year2022/day20.rs) | 2685 |
162162
| 21 | [Monkey Math](https://adventofcode.com/2022/day/21) | [Source](src/year2022/day21.rs) | 64 |
163163
| 22 | [Monkey Map](https://adventofcode.com/2022/day/22) | [Source](src/year2022/day22.rs) | 61 |
164-
| 23 | [Unstable Diffusion](https://adventofcode.com/2022/day/23) | [Source](src/year2022/day23.rs) | 1521 |
164+
| 23 | [Unstable Diffusion](https://adventofcode.com/2022/day/23) | [Source](src/year2022/day23.rs) | 1061 |
165165
| 24 | [Blizzard Basin](https://adventofcode.com/2022/day/24) | [Source](src/year2022/day24.rs) | 62 |
166166
| 25 | [Full of Hot Air](https://adventofcode.com/2022/day/25) | [Source](src/year2022/day25.rs) | 3 |
167167

src/year2022/day23.rs

Lines changed: 139 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -2,100 +2,17 @@
22
//!
33
//! We represent elves as bits in a integer then use bitwise operations to efficiently figure
44
//! out the movement for multiple elves at once.
5-
use self::Direction::*;
6-
use std::ops::{BitAnd, BitAndAssign, BitOr, Not};
5+
use Direction::*;
6+
7+
#[cfg(not(feature = "simd"))]
8+
use scalar::U256;
9+
#[cfg(feature = "simd")]
10+
use simd::U256;
711

812
/// The initial grid is 70 x 70. Elves stop moving when no other elf is adjacent so the grid
913
/// will expand at most 70 in any direction, giving 70 + 70 + 70 = 210 total.
1014
const HEIGHT: usize = 210;
1115

12-
/// Duct tape two `u128`s together.
13-
#[derive(Clone, Copy, Default)]
14-
pub struct U256 {
15-
left: u128,
16-
right: u128,
17-
}
18-
19-
impl U256 {
20-
fn bit_set(&mut self, offset: usize) {
21-
if offset < 128 {
22-
self.left |= 1 << (127 - offset);
23-
} else {
24-
self.right |= 1 << (255 - offset);
25-
}
26-
}
27-
28-
fn count_ones(&self) -> u32 {
29-
self.left.count_ones() + self.right.count_ones()
30-
}
31-
32-
fn non_zero(&self) -> bool {
33-
self.left != 0 || self.right != 0
34-
}
35-
36-
/// Used to find the bounding rectangle for part one.
37-
fn min_set(&self) -> Option<u32> {
38-
if self.left != 0 {
39-
Some(self.left.leading_zeros())
40-
} else if self.right != 0 {
41-
Some(128 + self.right.leading_zeros())
42-
} else {
43-
None
44-
}
45-
}
46-
47-
/// Used to find the bounding rectangle for part one.
48-
fn max_set(&self) -> Option<u32> {
49-
if self.right != 0 {
50-
Some(255 - self.right.trailing_zeros())
51-
} else if self.left != 0 {
52-
Some(127 - self.left.trailing_zeros())
53-
} else {
54-
None
55-
}
56-
}
57-
58-
fn left_shift(&self) -> U256 {
59-
U256 { left: (self.left << 1) | (self.right >> 127), right: (self.right << 1) }
60-
}
61-
62-
fn right_shift(&self) -> U256 {
63-
U256 { left: (self.left >> 1), right: (self.left << 127) | (self.right >> 1) }
64-
}
65-
}
66-
67-
/// Syntactic sugar to provide the regular `&`, `|` and `!` bitwise operator notation.
68-
impl BitAnd for U256 {
69-
type Output = U256;
70-
71-
fn bitand(self, rhs: U256) -> U256 {
72-
U256 { left: self.left & rhs.left, right: self.right & rhs.right }
73-
}
74-
}
75-
76-
impl BitOr for U256 {
77-
type Output = U256;
78-
79-
fn bitor(self, rhs: U256) -> U256 {
80-
U256 { left: self.left | rhs.left, right: self.right | rhs.right }
81-
}
82-
}
83-
84-
impl Not for U256 {
85-
type Output = U256;
86-
87-
fn not(self) -> U256 {
88-
U256 { left: !self.left, right: !self.right }
89-
}
90-
}
91-
92-
impl BitAndAssign for U256 {
93-
fn bitand_assign(&mut self, rhs: U256) {
94-
self.left &= rhs.left;
95-
self.right &= rhs.right;
96-
}
97-
}
98-
9916
enum Direction {
10017
North,
10118
South,
@@ -123,30 +40,39 @@ pub fn parse(input: &str) -> Input {
12340
for (y, row) in raw.iter().enumerate() {
12441
for (x, col) in row.iter().enumerate() {
12542
if *col == b'#' {
126-
grid[offset + y].bit_set(offset + x);
43+
grid[offset + y].set_bit(offset + x);
12744
}
12845
}
12946
}
13047

13148
Input { grid, north: default, south: default, west: default, east: default }
13249
}
13350

134-
pub fn part1(input: &Input) -> u32 {
51+
pub fn part1(input: &Input) -> usize {
13552
let mut input = *input;
13653
let mut order = [North, South, West, East];
13754

13855
for _ in 0..10 {
13956
step(&mut input, &mut order);
14057
}
14158

142-
// Find the bounding rectangle.
59+
// Find the total number of elves and the bounding rectangle.
14360
let grid = input.grid;
144-
let elves: u32 = grid.iter().map(U256::count_ones).sum();
145-
let min_x = grid.iter().filter_map(U256::min_set).min().unwrap();
146-
let max_x = grid.iter().filter_map(U256::max_set).max().unwrap();
147-
let min_y = grid.iter().position(U256::non_zero).unwrap() as u32;
148-
let max_y = grid.iter().rposition(U256::non_zero).unwrap() as u32;
61+
let elves = grid.iter().flat_map(U256::as_array).map(u8::count_ones).sum::<u32>() as usize;
14962

63+
// Vertical bounds.
64+
let min_y = grid.iter().position(U256::non_zero).unwrap();
65+
let max_y = grid.iter().rposition(U256::non_zero).unwrap();
66+
67+
// Horizontal bounds.
68+
let array = grid.iter().fold(U256::default(), |acc, &n| acc.or(n)).as_array();
69+
let left = array.iter().position(|&e| e != 0).unwrap();
70+
let right = array.iter().rposition(|&e| e != 0).unwrap();
71+
72+
let min_x = 8 * left + array[left].leading_zeros() as usize;
73+
let max_x = 8 * right + (7 - array[right].trailing_zeros()) as usize;
74+
75+
// Empty ground tiles.
15076
(max_x - min_x + 1) * (max_y - min_y + 1) - elves
15177
}
15278

@@ -175,51 +101,51 @@ fn step(input: &mut Input, order: &mut [Direction]) -> bool {
175101
let mut prev;
176102
// Find horizontal neighbors in each row. To make movement calculations easier
177103
// we invert so that a bit is 1 is movement is *possible*.
178-
let mut cur = !(grid[0].right_shift() | grid[0] | grid[0].left_shift());
179-
let mut next = !(grid[1].right_shift() | grid[1] | grid[1].left_shift());
104+
let mut cur = grid[0].shr().or(grid[0]).or(grid[0].shl()).not();
105+
let mut next = grid[1].shr().or(grid[1]).or(grid[1].shl()).not();
180106

181107
for i in start..end {
182108
// Calculating neighbors is relatively expensive so re-use results between rows.
183109
prev = cur;
184110
cur = next;
185-
next = !(grid[i + 1].right_shift() | grid[i + 1] | grid[i + 1].left_shift());
111+
next = grid[i + 1].shr().or(grid[i + 1]).or(grid[i + 1].shl()).not();
186112

187113
let mut up = prev;
188114
let mut down = next;
189115
// Find neighours in vertical columns.
190-
let vertical = !(grid[i - 1] | grid[i] | grid[i + 1]);
191-
let mut left = vertical.right_shift();
192-
let mut right = vertical.left_shift();
116+
let vertical = grid[i - 1].or(grid[i]).or(grid[i + 1]).not();
117+
let mut left = vertical.shr();
118+
let mut right = vertical.shl();
193119
// Elves need at least 1 neighbor to propose moving.
194-
let mut remaining = grid[i] & !(up & down & left & right);
120+
let mut remaining = grid[i].and(up.and(down).and(left).and(right).not());
195121

196122
// Consider each direction one at a time, removing any elves who propose it.
197123
for direction in &*order {
198124
match direction {
199125
North => {
200-
up &= remaining;
201-
remaining &= !up;
126+
up = up.and(remaining);
127+
remaining = remaining.and(up.not());
202128
}
203129
South => {
204-
down &= remaining;
205-
remaining &= !down;
130+
down = down.and(remaining);
131+
remaining = remaining.and(down.not());
206132
}
207133
West => {
208-
left &= remaining;
209-
remaining &= !left;
134+
left = left.and(remaining);
135+
remaining = remaining.and(left.not());
210136
}
211137
East => {
212-
right &= remaining;
213-
remaining &= !right;
138+
right = right.and(remaining);
139+
remaining = remaining.and(right.not());
214140
}
215141
}
216142
}
217143

218144
// Copy final proposals to an array for each direction.
219145
north[i - 1] = up;
220146
south[i + 1] = down;
221-
west[i] = left.left_shift();
222-
east[i] = right.right_shift();
147+
west[i] = left.shl();
148+
east[i] = right.shr();
223149
}
224150

225151
// Elves that propose moving to the same spot cancel each other out and no-one moves.
@@ -230,23 +156,115 @@ fn step(input: &mut Input, order: &mut [Direction]) -> bool {
230156
let down = south[i];
231157
let left = west[i];
232158
let right = east[i];
233-
north[i] &= !down;
234-
south[i] &= !up;
235-
west[i] &= !right;
236-
east[i] &= !left;
159+
north[i] = north[i].and(down.not());
160+
south[i] = south[i].and(up.not());
161+
west[i] = west[i].and(right.not());
162+
east[i] = east[i].and(left.not());
237163
}
238164

239165
for i in start..end {
240166
// Stationary elves.
241167
let same =
242-
grid[i] & !(north[i - 1] | south[i + 1] | west[i].right_shift() | east[i].left_shift());
168+
grid[i].and(north[i - 1].or(south[i + 1]).or(west[i].shr()).or(east[i].shl()).not());
243169
// Moving elves.
244-
let change = north[i] | south[i] | west[i] | east[i];
245-
grid[i] = same | change;
170+
let change = north[i].or(south[i]).or(west[i]).or(east[i]);
171+
grid[i] = same.or(change);
246172
moved |= change.non_zero();
247173
}
248174

249175
// Rotate the order of movement proposals for the next turn.
250176
order.rotate_left(1);
251177
moved
252178
}
179+
180+
#[cfg(not(feature = "simd"))]
181+
mod scalar {
182+
/// Duct tape two `u128`s together.
183+
#[derive(Clone, Copy, Default)]
184+
pub(super) struct U256 {
185+
left: u128,
186+
right: u128,
187+
}
188+
189+
impl U256 {
190+
pub(super) fn set_bit(&mut self, offset: usize) {
191+
if offset < 128 {
192+
self.left |= 1 << (127 - offset);
193+
} else {
194+
self.right |= 1 << (255 - offset);
195+
}
196+
}
197+
198+
pub(super) fn as_array(&self) -> [u8; 32] {
199+
[self.left.to_be_bytes(), self.right.to_be_bytes()].concat().try_into().unwrap()
200+
}
201+
202+
pub(super) fn non_zero(&self) -> bool {
203+
self.left != 0 || self.right != 0
204+
}
205+
206+
pub(super) fn shl(self) -> U256 {
207+
U256 { left: (self.left << 1) | (self.right >> 127), right: (self.right << 1) }
208+
}
209+
210+
pub(super) fn shr(self) -> U256 {
211+
U256 { left: (self.left >> 1), right: (self.left << 127) | (self.right >> 1) }
212+
}
213+
214+
pub(super) fn and(self, rhs: U256) -> U256 {
215+
U256 { left: self.left & rhs.left, right: self.right & rhs.right }
216+
}
217+
218+
pub(super) fn or(self, rhs: U256) -> U256 {
219+
U256 { left: self.left | rhs.left, right: self.right | rhs.right }
220+
}
221+
222+
pub(super) fn not(self) -> U256 {
223+
U256 { left: !self.left, right: !self.right }
224+
}
225+
}
226+
}
227+
228+
#[cfg(feature = "simd")]
229+
mod simd {
230+
use std::simd::*;
231+
232+
#[derive(Clone, Copy, Default)]
233+
pub(super) struct U256 {
234+
v: Simd<u8, 32>,
235+
}
236+
237+
impl U256 {
238+
pub(super) fn set_bit(&mut self, offset: usize) {
239+
self.v[offset / 8] |= 1 << (7 - offset % 8);
240+
}
241+
242+
pub(super) fn as_array(&self) -> [u8; 32] {
243+
self.v.to_array()
244+
}
245+
246+
pub(super) fn non_zero(&self) -> bool {
247+
self.v != Simd::splat(0)
248+
}
249+
250+
pub(super) fn shl(self) -> U256 {
251+
U256 { v: (self.v << 1) | (self.v.shift_elements_left::<1>(0) >> 7) }
252+
}
253+
254+
pub(super) fn shr(self) -> U256 {
255+
U256 { v: (self.v >> 1) | (self.v.shift_elements_right::<1>(0) << 7) }
256+
}
257+
258+
pub(super) fn and(self, rhs: U256) -> U256 {
259+
U256 { v: self.v & rhs.v }
260+
}
261+
262+
pub(super) fn or(self, rhs: U256) -> U256 {
263+
U256 { v: self.v | rhs.v }
264+
}
265+
266+
pub(super) fn not(self) -> U256 {
267+
U256 { v: !self.v }
268+
}
269+
}
270+
}

0 commit comments

Comments
 (0)