Skip to content

Commit f96e3d1

Browse files
authored
ref: Store SourceView linecache as offsets rather than pointers (#133)
1 parent 986b691 commit f96e3d1

File tree

1 file changed

+76
-35
lines changed

1 file changed

+76
-35
lines changed

src/sourceview.rs

Lines changed: 76 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
use std::fmt;
2-
use std::slice;
32
use std::str;
4-
use std::sync::atomic::AtomicUsize;
5-
use std::sync::atomic::Ordering;
63
use std::sync::Arc;
74
use std::sync::Mutex;
85

@@ -129,16 +126,14 @@ impl<'a> Iterator for Lines<'a> {
129126
/// operations.
130127
pub struct SourceView {
131128
source: Arc<str>,
132-
processed_until: AtomicUsize,
133-
lines: Mutex<Vec<&'static str>>,
129+
line_end_offsets: Mutex<Vec<LineEndOffset>>,
134130
}
135131

136132
impl Clone for SourceView {
137133
fn clone(&self) -> SourceView {
138134
SourceView {
139135
source: self.source.clone(),
140-
processed_until: AtomicUsize::new(0),
141-
lines: Mutex::new(vec![]),
136+
line_end_offsets: Mutex::new(vec![]),
142137
}
143138
}
144139
}
@@ -162,59 +157,74 @@ impl SourceView {
162157
pub fn new(source: Arc<str>) -> SourceView {
163158
SourceView {
164159
source,
165-
processed_until: AtomicUsize::new(0),
166-
lines: Mutex::new(vec![]),
160+
line_end_offsets: Mutex::new(vec![]),
167161
}
168162
}
169163

170164
/// Creates an optimized view from a given source string
171165
pub fn from_string(source: String) -> SourceView {
172166
SourceView {
173167
source: source.into(),
174-
processed_until: AtomicUsize::new(0),
175-
lines: Mutex::new(vec![]),
168+
line_end_offsets: Mutex::new(vec![]),
176169
}
177170
}
178171

179172
/// Returns a requested minified line.
180173
pub fn get_line(&self, idx: u32) -> Option<&str> {
181174
let idx = idx as usize;
182-
{
183-
let lines = self.lines.lock().unwrap();
184-
if idx < lines.len() {
185-
return Some(lines[idx]);
186-
}
175+
176+
let get_from_line_ends = |line_ends: &[LineEndOffset]| {
177+
let end = line_ends.get(idx)?.to_end_index();
178+
let start = if idx == 0 {
179+
0
180+
} else {
181+
line_ends[idx - 1].to_start_index()
182+
};
183+
Some(&self.source[start..end])
184+
};
185+
186+
let mut line_ends = self
187+
.line_end_offsets
188+
.lock()
189+
.unwrap_or_else(|e| e.into_inner());
190+
191+
if let Some(line) = get_from_line_ends(&line_ends) {
192+
return Some(line);
187193
}
188194

189-
// fetched everything
190-
if self.processed_until.load(Ordering::Relaxed) > self.source.len() {
195+
// check whether we've processed the entire string - the end of the
196+
// last-processed line would be the same as the end of the string
197+
if line_ends
198+
.last()
199+
.is_some_and(|i| i.to_end_index() == self.source.len())
200+
{
191201
return None;
192202
}
193203

194-
let mut lines = self.lines.lock().unwrap();
204+
let mut rest_offset = line_ends.last().map_or(0, |i| i.to_start_index());
205+
let mut rest = &self.source[rest_offset..];
195206
let mut done = false;
196207

197208
while !done {
198-
let rest = &self.source.as_bytes()[self.processed_until.load(Ordering::Relaxed)..];
199-
200-
let rv = if let Some(mut idx) = rest.iter().position(|&x| x == b'\n' || x == b'\r') {
201-
let rv = &rest[..idx];
202-
if rest[idx] == b'\r' && rest.get(idx + 1) == Some(&b'\n') {
203-
idx += 1;
209+
let line_term = if let Some(idx) = rest.find(['\n', '\r']) {
210+
rest_offset += idx;
211+
rest = &rest[idx..];
212+
if rest.starts_with("\r\n") {
213+
LineTerminator::CrLf
214+
} else {
215+
LineTerminator::LfOrCr
204216
}
205-
self.processed_until.fetch_add(idx + 1, Ordering::Relaxed);
206-
rv
207217
} else {
208-
self.processed_until
209-
.fetch_add(rest.len() + 1, Ordering::Relaxed);
218+
rest_offset += rest.len();
219+
rest = &rest[rest.len()..];
210220
done = true;
211-
rest
221+
LineTerminator::Eof
212222
};
213223

214-
lines.push(unsafe {
215-
str::from_utf8_unchecked(slice::from_raw_parts(rv.as_ptr(), rv.len()))
216-
});
217-
if let Some(&line) = lines.get(idx) {
224+
line_ends.push(LineEndOffset::new(rest_offset, line_term));
225+
rest_offset += line_term as usize;
226+
rest = &rest[line_term as usize..];
227+
if let Some(line) = get_from_line_ends(&line_ends) {
218228
return Some(line);
219229
}
220230
}
@@ -311,7 +321,7 @@ impl SourceView {
311321
/// Returns the number of lines.
312322
pub fn line_count(&self) -> usize {
313323
self.get_line(!0);
314-
self.lines.lock().unwrap().len()
324+
self.line_end_offsets.lock().unwrap().len()
315325
}
316326

317327
/// Returns the source map reference in the source view.
@@ -320,6 +330,37 @@ impl SourceView {
320330
}
321331
}
322332

333+
/// A wrapper around an index that stores a [`LineTerminator`] in its 2 lowest bits.
334+
// We use `u64` instead of `usize` in order to not lose data when bit-packing
335+
// on 32-bit targets.
336+
#[derive(Clone, Copy)]
337+
struct LineEndOffset(u64);
338+
339+
#[derive(Clone, Copy)]
340+
enum LineTerminator {
341+
Eof = 0,
342+
LfOrCr = 1,
343+
CrLf = 2,
344+
}
345+
346+
impl LineEndOffset {
347+
fn new(index: usize, line_end: LineTerminator) -> Self {
348+
let shifted = (index as u64) << 2;
349+
350+
Self(shifted | line_end as u64)
351+
}
352+
353+
/// Return the index of the end of this line.
354+
fn to_end_index(self) -> usize {
355+
(self.0 >> 2) as usize
356+
}
357+
358+
/// Return the index of the start of the next line.
359+
fn to_start_index(self) -> usize {
360+
self.to_end_index() + (self.0 & 0b11) as usize
361+
}
362+
}
363+
323364
#[cfg(test)]
324365
mod tests {
325366
use super::*;

0 commit comments

Comments
 (0)