Skip to content

Commit 98da401

Browse files
authored
Merge pull request #176 from OP-Engineering/oscar/use-URL
Migrates to standard URL
2 parents 2dc94e8 + 5a2dcec commit 98da401

File tree

6 files changed

+54
-66
lines changed

6 files changed

+54
-66
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ It's more than likely there is nothing wrong with the library:
2626
- If the target website does not have OpenGraph tags **the preview will most likely fail**, there are some fallbacks but in general, it will not work
2727
- **You cannot preview (fetch) another web page from YOUR web page. This is an intentional security feature of browsers called CORS**
2828

29+
<h1>DO NOT FETCH CONTENT DIRECTLY FROM A USERS DEVICE. ONLY RUN THIS IN YOUR SERVER AND SANDBOX IT IF YOU CAN</h1>
30+
Browsers block this via cors, but you might be clever like a fox and run this in React Native. This is a bad idea, you are exposing the device user to potentially malicious links
31+
2932
If you use this library and find it useful please consider [sponsoring me](https://github.com/sponsors/ospfranco), open source takes a lot of time and effort.
3033

3134
# Link Preview

__tests__/index.spec.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ describe(`#getLinkPreview()`, () => {
3131
expect(linkInfo.charset?.toLowerCase()).toEqual(`utf-8`);
3232
});
3333

34-
it("should extract author from news article", async () => {
34+
xit("should extract author from news article", async () => {
3535
const linkInfo: any = await getLinkPreview(
3636
`https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/`
3737
);
3838

3939
expect(linkInfo.author).toEqual(`Matt Emma`);
40-
})
40+
});
4141

4242
it(`should extract link info from a URL with a newline`, async () => {
4343
const linkInfo: any = await getLinkPreview(
@@ -251,17 +251,22 @@ describe(`#getLinkPreview()`, () => {
251251
expect(response.mediaType).toEqual(`website`);
252252
});
253253

254-
it("should handle override response body using onResponse option", async () => {
254+
it("should handle override response body using onResponse option", async () => {
255255
let firstParagraphText;
256256

257-
const res: any = await getLinkPreview(`https://www.example.com/`, {
257+
const res: any = await getLinkPreview(`https://www.example.com/`, {
258258
onResponse: (result, doc) => {
259-
firstParagraphText = doc('p').first().text().split('\n').map(x=> x.trim()).join(' ');
259+
firstParagraphText = doc("p")
260+
.first()
261+
.text()
262+
.split("\n")
263+
.map((x) => x.trim())
264+
.join(" ");
260265
result.siteName = `SiteName has been overridden`;
261266
result.description = firstParagraphText;
262267

263268
return result;
264-
}
269+
},
265270
});
266271

267272
expect(res.siteName).toEqual("SiteName has been overridden");

index.ts

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import cheerio from "cheerio";
2-
import urlObj from "url";
32
import { CONSTANTS } from "./constants";
43

54
interface ILinkPreviewResponse {
@@ -16,12 +15,12 @@ interface ILinkPreviewResponse {
1615
}
1716

1817
interface IVideoType {
19-
url: string | undefined,
20-
secureUrl: string | null | undefined,
21-
type: string | null | undefined,
22-
width: string | undefined,
23-
height: string | undefined,
24-
};
18+
url: string | undefined;
19+
secureUrl: string | null | undefined;
20+
type: string | null | undefined;
21+
width: string | undefined;
22+
height: string | undefined;
23+
}
2524

2625
interface ILinkPreviewOptions {
2726
headers?: Record<string, string>;
@@ -31,7 +30,11 @@ interface ILinkPreviewOptions {
3130
followRedirects?: `follow` | `error` | `manual`;
3231
resolveDNSHost?: (url: string) => Promise<string>;
3332
handleRedirects?: (baseURL: string, forwardedURL: string) => boolean;
34-
onResponse?: (response: ILinkPreviewResponse, doc: cheerio.Root, url?: URL) => ILinkPreviewResponse;
33+
onResponse?: (
34+
response: ILinkPreviewResponse,
35+
doc: cheerio.Root,
36+
url?: URL,
37+
) => ILinkPreviewResponse;
3538
}
3639

3740
interface IPreFetchedResource {
@@ -79,7 +82,7 @@ function getAuthor(doc: cheerio.Root) {
7982
const author =
8083
metaTagContent(doc, `author`, `name`) ||
8184
metaTagContent(doc, `article:author`, `property`);
82-
return author;
85+
return author;
8386
}
8487

8588
function getDescription(doc: cheerio.Root) {
@@ -105,7 +108,7 @@ function getMediaType(doc: cheerio.Root) {
105108
function getImages(
106109
doc: cheerio.Root,
107110
rootUrl: string,
108-
imagesPropertyType?: string
111+
imagesPropertyType?: string,
109112
) {
110113
let images: string[] = [];
111114
let nodes: cheerio.Cheerio | null;
@@ -122,7 +125,7 @@ function getImages(
122125
if (node.type === `tag`) {
123126
src = node.attribs.content;
124127
if (src) {
125-
src = urlObj.resolve(rootUrl, src);
128+
src = new URL(src, rootUrl).href;
126129
images.push(src);
127130
}
128131
}
@@ -132,7 +135,7 @@ function getImages(
132135
if (images.length <= 0 && !imagesPropertyType) {
133136
src = doc(`link[rel=image_src]`).attr(`href`);
134137
if (src) {
135-
src = urlObj.resolve(rootUrl, src);
138+
src = new URL(src, rootUrl).href;
136139
images = [src];
137140
} else {
138141
nodes = doc(`img`);
@@ -146,7 +149,7 @@ function getImages(
146149
dic[src] = true;
147150
// width = node.attribs.width;
148151
// height = node.attribs.height;
149-
images.push(urlObj.resolve(rootUrl, src));
152+
images.push(new URL(src, rootUrl).href);
150153
}
151154
});
152155
}
@@ -220,12 +223,12 @@ function getVideos(doc: cheerio.Root) {
220223
}
221224

222225
// returns default favicon (//hostname/favicon.ico) for a url
223-
function getDefaultFavicon(rootUrl: string) {
224-
return urlObj.resolve(rootUrl, `/favicon.ico`);
226+
function getDefaultFavicon(rootUrl: string): string {
227+
return new URL(`/favicon.ico`, rootUrl).href;
225228
}
226229

227230
// returns an array of URLs to favicon images
228-
function getFavicons(doc: cheerio.Root, rootUrl: string) {
231+
function getFavicons(doc: cheerio.Root, rootUrl: string): string[] {
229232
const images = [];
230233
let nodes: cheerio.Cheerio | never[] = [];
231234
let src: string | undefined;
@@ -245,7 +248,7 @@ function getFavicons(doc: cheerio.Root, rootUrl: string) {
245248
nodes.each((_: number, node: cheerio.Element) => {
246249
if (node.type === `tag`) src = node.attribs.href;
247250
if (src) {
248-
src = urlObj.resolve(rootUrl, src);
251+
src = new URL(src, rootUrl).href;
249252
images.push(src);
250253
}
251254
});
@@ -300,7 +303,7 @@ function parseTextResponse(
300303
body: string,
301304
url: string,
302305
options: ILinkPreviewOptions = {},
303-
contentType?: string
306+
contentType?: string,
304307
): ILinkPreviewResponse {
305308
const doc = cheerio.load(body);
306309

@@ -318,35 +321,31 @@ function parseTextResponse(
318321
};
319322

320323
if (options?.onResponse && typeof options.onResponse !== `function`) {
321-
throw new Error(
322-
`link-preview-js onResponse option must be a function`
323-
);
324+
throw new Error(`link-preview-js onResponse option must be a function`);
324325
}
325326

326327
if (options?.onResponse) {
327-
// send in a cloned response (to avoid mutation of original response reference)
328-
const clonedResponse = structuredClone(response);
329-
const urlObject = new URL(url)
328+
// send in a cloned response (to avoid mutation of original response reference)
329+
const clonedResponse = structuredClone(response);
330+
const urlObject = new URL(url);
330331
response = options.onResponse(clonedResponse, doc, urlObject);
331332
}
332333

333-
334334
return response;
335-
336335
}
337336

338337
function parseUnknownResponse(
339338
body: string,
340339
url: string,
341340
options: ILinkPreviewOptions = {},
342-
contentType?: string
341+
contentType?: string,
343342
) {
344343
return parseTextResponse(body, url, options, contentType);
345344
}
346345

347346
function parseResponse(
348347
response: IPreFetchedResource,
349-
options?: ILinkPreviewOptions
348+
options?: ILinkPreviewOptions,
350349
) {
351350
try {
352351
// console.log("[link-preview-js] response", response);
@@ -383,9 +382,8 @@ function parseResponse(
383382
}
384383

385384
if (CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) {
386-
const htmlString = response.data;
387385
return {
388-
...parseTextResponse(htmlString, response.url, options, contentType),
386+
...parseTextResponse(response.data, response.url, options, contentType),
389387
charset,
390388
};
391389
}
@@ -407,7 +405,7 @@ function parseResponse(
407405
throw new Error(
408406
`link-preview-js could not fetch link information ${(
409407
e as any
410-
).toString()}`
408+
).toString()}`,
411409
);
412410
}
413411
}
@@ -421,7 +419,7 @@ function parseResponse(
421419
*/
422420
export async function getLinkPreview(
423421
text: string,
424-
options?: ILinkPreviewOptions
422+
options?: ILinkPreviewOptions,
425423
) {
426424
if (!text || typeof text !== `string`) {
427425
throw new Error(`link-preview-js did not receive a valid url or text`);
@@ -438,7 +436,7 @@ export async function getLinkPreview(
438436

439437
if (options?.followRedirects === `manual` && !options?.handleRedirects) {
440438
throw new Error(
441-
`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`
439+
`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`,
442440
);
443441
}
444442

@@ -462,9 +460,7 @@ export async function getLinkPreview(
462460
? options.proxyUrl.concat(detectedUrl)
463461
: detectedUrl;
464462

465-
// Seems like fetchOptions type definition is out of date
466-
// https://github.com/node-fetch/node-fetch/issues/741
467-
let response = await fetch(fetchUrl, fetchOptions as any).catch((e) => {
463+
let response = await fetch(fetchUrl, fetchOptions).catch((e) => {
468464
if (e.name === `AbortError`) {
469465
throw new Error(`Request timeout`);
470466
}
@@ -480,12 +476,14 @@ export async function getLinkPreview(
480476
options?.handleRedirects
481477
) {
482478
const locationHeader = response.headers.get(`location`) || ``;
483-
const isAbsoluteURI = locationHeader.startsWith('http://') || locationHeader.startsWith('https://');
479+
const isAbsoluteURI =
480+
locationHeader.startsWith("http://") ||
481+
locationHeader.startsWith("https://");
484482

485483
// Resolve the URL, handling both absolute and relative URLs
486484
const forwardedUrl = isAbsoluteURI
487485
? locationHeader
488-
: urlObj.resolve(fetchUrl, locationHeader);
486+
: new URL(locationHeader, fetchUrl).href;
489487

490488
if (!options.handleRedirects(fetchUrl, forwardedUrl)) {
491489
throw new Error(`link-preview-js could not handle redirect`);
@@ -527,7 +525,7 @@ export async function getLinkPreview(
527525
*/
528526
export async function getPreviewFromContent(
529527
response: IPreFetchedResource,
530-
options?: ILinkPreviewOptions
528+
options?: ILinkPreviewOptions,
531529
) {
532530
if (!response || typeof response !== `object`) {
533531
throw new Error(`link-preview-js did not receive a valid response object`);
@@ -539,4 +537,3 @@ export async function getPreviewFromContent(
539537

540538
return parseResponse(response, options);
541539
}
542-

mise.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[tools]
2+
node = "24"

package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
"license": "MIT",
3131
"repository": "https://github.com/ospfranco/link-preview-js",
3232
"dependencies": {
33-
"cheerio": "1.0.0-rc.11",
34-
"url": "0.11.0"
33+
"cheerio": "1.0.0-rc.11"
3534
},
3635
"files": [
3736
"build"

yarn.lock

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,16 +2124,6 @@ prompts@^2.0.1:
21242124
kleur "^3.0.3"
21252125
sisteransi "^1.0.5"
21262126

2127-
punycode@1.3.2:
2128-
version "1.3.2"
2129-
resolved "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz"
2130-
integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
2131-
2132-
querystring@0.2.0:
2133-
version "0.2.0"
2134-
resolved "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz"
2135-
integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
2136-
21372127
react-is@^18.0.0:
21382128
version "18.2.0"
21392129
resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b"
@@ -2416,14 +2406,6 @@ update-browserslist-db@^1.0.4:
24162406
escalade "^3.1.1"
24172407
picocolors "^1.0.0"
24182408

2419-
url@0.11.0:
2420-
version "0.11.0"
2421-
resolved "https://registry.npmjs.org/url/-/url-0.11.0.tgz"
2422-
integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
2423-
dependencies:
2424-
punycode "1.3.2"
2425-
querystring "0.2.0"
2426-
24272409
v8-to-istanbul@^9.0.1:
24282410
version "9.0.1"
24292411
resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.1.tgz#b6f994b0b5d4ef255e17a0d17dc444a9f5132fa4"

0 commit comments

Comments
 (0)