Skip to content

Commit 7bacb83

Browse files
committed
feat: provide modified position
1 parent 973906e commit 7bacb83

File tree

9 files changed

+5011
-630
lines changed

9 files changed

+5011
-630
lines changed

package-lock.json

Lines changed: 4430 additions & 621 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,16 @@
4444
},
4545
"devDependencies": {
4646
"@types/jest": "^22.1.3",
47+
"@types/lodash.mapvalues": "^4.6.3",
4748
"jest": "^22.4.2",
4849
"prettier": "1.11.0",
4950
"ts-jest": "^22.4.0",
5051
"tslint": "^5.9.1",
5152
"tslint-config-ktsn": "^2.1.0",
5253
"tslint-config-prettier": "^1.9.0",
5354
"typescript": "^2.7.2"
55+
},
56+
"dependencies": {
57+
"lodash.mapvalues": "^4.6.0"
5458
}
5559
}

src/html-parser.js

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
/*!
2+
* HTML Parser By John Resig (ejohn.org)
3+
* Modified by Juriy "kangax" Zaytsev, Evan You and Vue.js community
4+
* Original code by Erik Arvidsson, Mozilla Public License
5+
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
6+
*/
7+
8+
import { makeMap, no } from './utils'
9+
10+
// HTML5 tags https://html.spec.whatwg.org/multipage/indices.html#elements-3
11+
// Phrasing Content https://html.spec.whatwg.org/multipage/dom.html#phrasing-content
12+
const isNonPhrasingTag = makeMap(
13+
'address,article,aside,base,blockquote,body,caption,col,colgroup,dd,' +
14+
'details,dialog,div,dl,dt,fieldset,figcaption,figure,footer,form,' +
15+
'h1,h2,h3,h4,h5,h6,head,header,hgroup,hr,html,legend,li,menuitem,meta,' +
16+
'optgroup,option,param,rp,rt,source,style,summary,tbody,td,tfoot,th,thead,' +
17+
'title,tr,track'
18+
)
19+
20+
// Regular Expressions for parsing tags and attributes
21+
const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
22+
// could use https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
23+
// but for Vue templates we can enforce a simple charset
24+
const ncname = '[a-zA-Z_][\\w\\-\\.]*'
25+
const qnameCapture = `((?:${ncname}\\:)?${ncname})`
26+
const startTagOpen = new RegExp(`^<${qnameCapture}`)
27+
const startTagClose = /^\s*(\/?)>/
28+
const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
29+
const doctype = /^<!DOCTYPE [^>]+>/i
30+
// #7298: escape - to avoid being pased as HTML comment when inlined in page
31+
const comment = /^<!\--/
32+
const conditionalComment = /^<!\[/
33+
34+
let IS_REGEX_CAPTURING_BROKEN = false
35+
'x'.replace(/x(.)?/g, function (m, g) {
36+
IS_REGEX_CAPTURING_BROKEN = g === ''
37+
})
38+
39+
// Special Elements (can contain anything)
40+
export const isPlainTextElement = makeMap('script,style,textarea', true)
41+
const reCache = {}
42+
43+
const decodingMap = {
44+
'&lt;': '<',
45+
'&gt;': '>',
46+
'&quot;': '"',
47+
'&amp;': '&',
48+
'&#10;': '\n',
49+
'&#9;': '\t'
50+
}
51+
const encodedAttr = /&(?:lt|gt|quot|amp);/g
52+
const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#10|#9);/g
53+
54+
// #5992
55+
const isIgnoreNewlineTag = makeMap('pre,textarea', true)
56+
const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
57+
58+
function decodeAttr (value, shouldDecodeNewlines) {
59+
const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
60+
return value.replace(re, match => decodingMap[match])
61+
}
62+
63+
export function parseHTML (html, options) {
64+
const stack = []
65+
const expectHTML = options.expectHTML
66+
const isUnaryTag = options.isUnaryTag || no
67+
const canBeLeftOpenTag = options.canBeLeftOpenTag || no
68+
let index = 0
69+
let last, lastTag
70+
while (html) {
71+
last = html
72+
// Make sure we're not in a plaintext content element like script/style
73+
if (!lastTag || !isPlainTextElement(lastTag)) {
74+
let textEnd = html.indexOf('<')
75+
if (textEnd === 0) {
76+
// Comment:
77+
if (comment.test(html)) {
78+
const commentEnd = html.indexOf('-->')
79+
80+
if (commentEnd >= 0) {
81+
if (options.shouldKeepComment) {
82+
options.comment(html.substring(4, commentEnd))
83+
}
84+
advance(commentEnd + 3)
85+
continue
86+
}
87+
}
88+
89+
// http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
90+
if (conditionalComment.test(html)) {
91+
const conditionalEnd = html.indexOf(']>')
92+
93+
if (conditionalEnd >= 0) {
94+
advance(conditionalEnd + 2)
95+
continue
96+
}
97+
}
98+
99+
// Doctype:
100+
const doctypeMatch = html.match(doctype)
101+
if (doctypeMatch) {
102+
advance(doctypeMatch[0].length)
103+
continue
104+
}
105+
106+
// End tag:
107+
const endTagMatch = html.match(endTag)
108+
if (endTagMatch) {
109+
const curIndex = index
110+
advance(endTagMatch[0].length)
111+
parseEndTag(endTagMatch[1], curIndex, index)
112+
continue
113+
}
114+
115+
// Start tag:
116+
const startTagMatch = parseStartTag()
117+
if (startTagMatch) {
118+
handleStartTag(startTagMatch)
119+
if (shouldIgnoreFirstNewline(lastTag, html)) {
120+
advance(1)
121+
}
122+
continue
123+
}
124+
}
125+
126+
let text, rest, next
127+
if (textEnd >= 0) {
128+
rest = html.slice(textEnd)
129+
while (
130+
!endTag.test(rest) &&
131+
!startTagOpen.test(rest) &&
132+
!comment.test(rest) &&
133+
!conditionalComment.test(rest)
134+
) {
135+
// < in plain text, be forgiving and treat it as text
136+
next = rest.indexOf('<', 1)
137+
if (next < 0) break
138+
textEnd += next
139+
rest = html.slice(textEnd)
140+
}
141+
text = html.substring(0, textEnd)
142+
advance(textEnd)
143+
}
144+
145+
if (textEnd < 0) {
146+
text = html
147+
html = ''
148+
}
149+
150+
if (options.chars && text) {
151+
options.chars(text)
152+
}
153+
} else {
154+
let endTagLength = 0
155+
const stackedTag = lastTag.toLowerCase()
156+
const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
157+
const rest = html.replace(reStackedTag, function (all, text, endTag) {
158+
endTagLength = endTag.length
159+
if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
160+
text = text
161+
.replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
162+
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
163+
}
164+
if (shouldIgnoreFirstNewline(stackedTag, text)) {
165+
text = text.slice(1)
166+
}
167+
if (options.chars) {
168+
options.chars(text)
169+
}
170+
return ''
171+
})
172+
index += html.length - rest.length
173+
html = rest
174+
parseEndTag(stackedTag, index - endTagLength, index)
175+
}
176+
177+
if (html === last) {
178+
options.chars && options.chars(html)
179+
if (process.env.NODE_ENV !== 'production' && !stack.length && options.warn) {
180+
options.warn(`Mal-formatted tag at end of template: "${html}"`)
181+
}
182+
break
183+
}
184+
}
185+
186+
// Clean up any remaining tags
187+
parseEndTag()
188+
189+
function advance (n) {
190+
index += n
191+
html = html.substring(n)
192+
}
193+
194+
function parseStartTag () {
195+
const start = html.match(startTagOpen)
196+
if (start) {
197+
const match = {
198+
tagName: start[1],
199+
attrs: [],
200+
start: index
201+
}
202+
advance(start[0].length)
203+
let end, attr
204+
while (!(end = html.match(startTagClose)) && (attr = html.match(attribute))) {
205+
advance(attr[0].length)
206+
match.attrs.push(attr)
207+
}
208+
if (end) {
209+
match.unarySlash = end[1]
210+
advance(end[0].length)
211+
match.end = index
212+
return match
213+
}
214+
}
215+
}
216+
217+
function handleStartTag (match) {
218+
const tagName = match.tagName
219+
const unarySlash = match.unarySlash
220+
221+
if (expectHTML) {
222+
if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
223+
parseEndTag(lastTag)
224+
}
225+
if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
226+
parseEndTag(tagName)
227+
}
228+
}
229+
230+
const unary = isUnaryTag(tagName) || !!unarySlash
231+
232+
const l = match.attrs.length
233+
const attrs = new Array(l)
234+
for (let i = 0; i < l; i++) {
235+
const args = match.attrs[i]
236+
// hackish work around FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
237+
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
238+
if (args[3] === '') { delete args[3] }
239+
if (args[4] === '') { delete args[4] }
240+
if (args[5] === '') { delete args[5] }
241+
}
242+
const value = args[3] || args[4] || args[5] || ''
243+
const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
244+
? options.shouldDecodeNewlinesForHref
245+
: options.shouldDecodeNewlines
246+
attrs[i] = {
247+
name: args[1],
248+
value: decodeAttr(value, shouldDecodeNewlines)
249+
}
250+
}
251+
252+
if (!unary) {
253+
stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs })
254+
lastTag = tagName
255+
}
256+
257+
if (options.start) {
258+
options.start(tagName, attrs, unary, match.start, match.end)
259+
}
260+
}
261+
262+
function parseEndTag (tagName, start, end) {
263+
let pos, lowerCasedTagName
264+
if (start == null) start = index
265+
if (end == null) end = index
266+
267+
if (tagName) {
268+
lowerCasedTagName = tagName.toLowerCase()
269+
}
270+
271+
// Find the closest opened tag of the same type
272+
if (tagName) {
273+
for (pos = stack.length - 1; pos >= 0; pos--) {
274+
if (stack[pos].lowerCasedTag === lowerCasedTagName) {
275+
break
276+
}
277+
}
278+
} else {
279+
// If no tag name is provided, clean shop
280+
pos = 0
281+
}
282+
283+
if (pos >= 0) {
284+
// Close all the open elements, up the stack
285+
for (let i = stack.length - 1; i >= pos; i--) {
286+
if (process.env.NODE_ENV !== 'production' &&
287+
(i > pos || !tagName) &&
288+
options.warn
289+
) {
290+
options.warn(
291+
`tag <${stack[i].tag}> has no matching end tag.`
292+
)
293+
}
294+
if (options.end) {
295+
options.end(stack[i].tag, start, end)
296+
}
297+
}
298+
299+
// Remove the open elements from the stack
300+
stack.length = pos
301+
lastTag = pos && stack[pos - 1].tag
302+
} else if (lowerCasedTagName === 'br') {
303+
if (options.start) {
304+
options.start(tagName, [], true, start, end)
305+
}
306+
} else if (lowerCasedTagName === 'p') {
307+
if (options.start) {
308+
options.start(tagName, [], false, start, end)
309+
}
310+
if (options.end) {
311+
options.end(tagName, start, end)
312+
}
313+
}
314+
}
315+
}

src/index.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import mapValues from 'lodash.mapvalues'
2+
import {
3+
parseComponent as _parseComponent,
4+
SFCBlockRaw,
5+
SFCDescriptorRaw
6+
} from './sfc-parser'
7+
8+
export class SFCBlock {
9+
type!: string
10+
content!: string
11+
attrs!: Record<string, string | true>
12+
start!: number
13+
end!: number
14+
lang?: string
15+
src?: string
16+
scoped?: true
17+
module?: string | true
18+
19+
constructor(block: SFCBlockRaw) {
20+
Object.keys(block).forEach(_key => {
21+
const key = _key as keyof SFCBlockRaw
22+
this[key] = block[key]
23+
})
24+
}
25+
26+
modifyOffset(offset: number): number {
27+
return this.start + offset
28+
}
29+
30+
modifyRange(range: [number, number]): [number, number] {
31+
return [this.modifyOffset(range[0]), this.modifyOffset(range[1])]
32+
}
33+
34+
modifyPosition(position: {
35+
start: number
36+
end: number
37+
}): { start: number; end: number } {
38+
return {
39+
start: this.modifyOffset(position.start),
40+
end: this.modifyOffset(position.end)
41+
}
42+
}
43+
}
44+
45+
export interface SFCDescriptor {
46+
template: SFCBlock | null
47+
script: SFCBlock | null
48+
styles: SFCBlock[]
49+
customBlocks: SFCBlock[]
50+
}
51+
52+
export function parseComponent(code: string): SFCDescriptor {
53+
return mapValues(_parseComponent(code), (value, key) => {
54+
if (Array.isArray(value)) {
55+
return value.map(v => new SFCBlock(v))
56+
} else {
57+
return value && new SFCBlock(value)
58+
}
59+
}) as SFCDescriptor
60+
}

0 commit comments

Comments
 (0)