Skip to content

Commit a274ba5

Browse files
committed
feat: add in mini parser for quoteless
1 parent d0d1428 commit a274ba5

File tree

6 files changed

+177
-10
lines changed

6 files changed

+177
-10
lines changed

bin/esbuild

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#!/usr/bin/env sh
22

3-
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs index.js --minify --allow-overwrite --outfile=index.cjs
4-
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse.js --bundle --minify --allow-overwrite --outfile=parse.cjs
5-
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs format.js --bundle --minify --allow-overwrite --outfile=format.cjs
3+
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs index.js --minify --allow-overwrite --outfile=index.cjs
4+
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse.js --bundle --minify --allow-overwrite --outfile=parse.cjs
5+
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse-mini.js --bundle --minify --allow-overwrite --outfile=parse-mini.cjs
6+
node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs format.js --bundle --minify --allow-overwrite --outfile=format.cjs
67

7-
node_modules/.bin/esbuild --platform=node --format=esm index.js --minify --sourcemap=external --allow-overwrite --outfile=index.mjs
8-
node_modules/.bin/esbuild --platform=node --format=esm parse.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse.mjs
9-
node_modules/.bin/esbuild --platform=node --format=esm format.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=format.mjs
8+
node_modules/.bin/esbuild --platform=node --format=esm index.js --minify --sourcemap=external --allow-overwrite --outfile=index.mjs
9+
node_modules/.bin/esbuild --platform=node --format=esm parse.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse.mjs
10+
node_modules/.bin/esbuild --platform=node --format=esm parse-mini.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse-mini.mjs
11+
node_modules/.bin/esbuild --platform=node --format=esm format.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=format.mjs
1012

index.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
// import {TextDecoder} from 'node:util'
22
// import {defaultOptions, optionDetectNewlineValue} from './options.js'
33
import csvParse from 'csv-rex/parse'
4+
import csvParseMini from 'csv-rex/parse-mini'
45
import csvFormat from 'csv-rex/format'
56

67
export const parse = csvParse
8+
export const parseMini = csvParseMini
79
export const format = csvFormat
810

911
export default {
1012
parse: csvParse,
13+
parseMini: csvParseMini,
1114
format: csvFormat
1215
}

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "csv-rex",
3-
"version": "0.3.1",
3+
"version": "0.4.0",
44
"description": "A tiny and fast CSV parser for JavaScript.",
55
"type": "module",
66
"files": [
@@ -26,6 +26,14 @@
2626
"default": "./parse.cjs"
2727
}
2828
},
29+
"./parse-mini": {
30+
"import": {
31+
"default": "./parse-mini.mjs"
32+
},
33+
"require": {
34+
"default": "./parse-mini.cjs"
35+
}
36+
},
2937
"./format": {
3038
"import": {
3139
"default": "./format.mjs"
@@ -41,7 +49,8 @@
4149
"pre-commit": "lint-staged",
4250
"lint": "prettier --write *.{js,json} && standard --fix *.js",
4351
"test": "c8 node --test",
44-
"build": "./bin/esbuild"
52+
"build": "./bin/esbuild",
53+
"bench": "npm run build && node parse.bench.js"
4554
},
4655
"repository": {
4756
"type": "git",

parse-mini.js

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// chunkSize >> largest expected row
2+
const defaultOptions = {
3+
header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json
4+
newlineChar: '\r\n', // undefined: detect newline from file; '\r\n': Windows; '\n': Linux/Mac
5+
delimiterChar: '',
6+
// quoteChar: '"',
7+
// escapeChar: '"', // default: `quoteChar`
8+
9+
// Parse
10+
emptyFieldValue: '',
11+
coerceField: (field) => field, // TODO tests
12+
// commentPrefixValue: false, // falsy: disable, '//': enabled
13+
// errorOnComment: true,
14+
// errorOnEmptyLine: true,
15+
errorOnFieldsMismatch: true
16+
// errorOnFieldMalformed: true
17+
}
18+
19+
const length = (value) => value.length
20+
21+
export const parse = (opts = {}) => {
22+
const options = { ...defaultOptions, ...opts }
23+
options.escapeChar ??= options.quoteChar
24+
25+
let { header, newlineChar, delimiterChar } = options
26+
let headerLength = length(header)
27+
const {
28+
// quoteChar,
29+
// escapeChar,
30+
// commentPrefixValue,
31+
emptyFieldValue,
32+
coerceField,
33+
// errorOnEmptyLine,
34+
// errorOnComment,
35+
errorOnFieldsMismatch
36+
// errorOnFieldMalformed
37+
} = options
38+
39+
let chunk, enqueue
40+
let partialLine = ''
41+
let idx = 0
42+
const enqueueRow = (row) => {
43+
let data = row
44+
idx += 1
45+
if (headerLength) {
46+
const rowLength = length(row)
47+
48+
if (headerLength !== rowLength) {
49+
if (errorOnFieldsMismatch) {
50+
enqueueError(
51+
'FieldsMismatch',
52+
`Incorrect number of fields parsed, expected ${headerLength}.`
53+
)
54+
}
55+
return
56+
} else {
57+
data = {}
58+
for (let i = 0; i < rowLength; i++) {
59+
data[header[i]] = row[i]
60+
}
61+
}
62+
}
63+
enqueue({ idx, data })
64+
}
65+
66+
const enqueueError = (code, message) => {
67+
enqueue({ idx, err: { code, message } })
68+
}
69+
70+
const transformField = (field, idx) => {
71+
return coerceField(field || emptyFieldValue, idx)
72+
}
73+
74+
const chunkParse = (string, controller) => {
75+
chunk = string
76+
enqueue = controller.enqueue
77+
const lines = chunk.split(newlineChar) // TODO use cursor pattern
78+
let linesLength = length(lines)
79+
if (linesLength > 1) {
80+
partialLine = lines.pop()
81+
linesLength -= 1
82+
}
83+
84+
let i = 0
85+
if (header === true) {
86+
header = lines[i].split(delimiterChar)
87+
headerLength = length(header)
88+
i += 1
89+
}
90+
91+
for (; i < linesLength; i++) {
92+
const line = lines[i]
93+
const row = []
94+
let cursor = 0
95+
while (cursor < line.length) {
96+
const delimiterIndex = line.indexOf(delimiterChar, cursor)
97+
if (delimiterIndex === -1) {
98+
row.push(transformField(line.substring(cursor), row.length))
99+
break
100+
}
101+
row.push(
102+
transformField(line.substring(cursor, delimiterIndex), row.length)
103+
)
104+
cursor = delimiterIndex + 1
105+
}
106+
enqueueRow(row)
107+
}
108+
}
109+
110+
return {
111+
chunkParse,
112+
header: () => header,
113+
previousChunk: () => partialLine
114+
}
115+
}
116+
117+
export default (input, opts) => {
118+
const options = {
119+
...defaultOptions,
120+
...{
121+
enableReturn: true,
122+
chunkSize: 64 * 1024 * 1024,
123+
enqueue: () => {}
124+
},
125+
...opts
126+
}
127+
const { chunkSize, enableReturn, enqueue } = options
128+
const { chunkParse, previousChunk } = parse(options)
129+
130+
const res = []
131+
const controller = { enqueue }
132+
133+
if (enableReturn) {
134+
controller.enqueue = (row) => {
135+
enqueue(row)
136+
res.push(row.data)
137+
}
138+
}
139+
140+
let position = 0
141+
while (position < input.length) {
142+
const chunk =
143+
previousChunk() + input.substring(position, position + chunkSize)
144+
145+
// Checking if you can use fastParse slows it down more than checking for quoteChar on ever field.
146+
chunkParse(chunk, controller)
147+
position += chunkSize
148+
}
149+
// flush
150+
const chunk = previousChunk()
151+
chunkParse(chunk, controller, true)
152+
153+
return enableReturn && res
154+
}

parse.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ export const detectChar = (chunk, pattern) => {
255255
const chars = {}
256256
while ((match = pattern.exec(chunk))) {
257257
const char = match[0]
258-
console.log({ char, chars })
259258
chars[char] ??= 0
260259
chars[char] += 1
261260
if (chars[char] > 5) return char

0 commit comments

Comments
 (0)