From 87adde774994b74ed7de6276ee8e7b0907671ce1 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Thu, 23 Jan 2025 12:19:34 -0300 Subject: [PATCH 1/9] Added actions --- .../general-extraction/general-extraction.mjs | 86 +++++++++ components/scrapingant/common/constants.mjs | 164 ++++++++++++++++++ components/scrapingant/package.json | 5 +- components/scrapingant/scrapingant.app.mjs | 88 +++++++++- 4 files changed, 338 insertions(+), 5 deletions(-) create mode 100644 components/scrapingant/actions/general-extraction/general-extraction.mjs create mode 100644 components/scrapingant/common/constants.mjs diff --git a/components/scrapingant/actions/general-extraction/general-extraction.mjs b/components/scrapingant/actions/general-extraction/general-extraction.mjs new file mode 100644 index 0000000000000..03f5dd1c77839 --- /dev/null +++ b/components/scrapingant/actions/general-extraction/general-extraction.mjs @@ -0,0 +1,86 @@ +import app from "../../scrapingant.app.mjs"; + +export default { + key: "scrapingant-general-extraction", + name: "General Extraction", + description: "Send a request using the standard extraction method of ScrapingAnt.", + version: "0.0.1", + type: "action", + props: { + app, + url: { + propDefinition: [ + app, + "url", + ], + }, + browser: { + propDefinition: [ + app, + "browser", + ], + }, + returnPageSource: { + propDefinition: [ + app, + "returnPageSource", + ], + }, + cookies: { + propDefinition: [ + app, + "cookies", + ], + }, + jsSnippet: { + propDefinition: [ + app, + "jsSnippet", + ], + }, + proxyType: { + propDefinition: [ + app, + "proxyType", + ], + }, + proxyCountry: { + propDefinition: [ + app, + "proxyCountry", + ], + }, + waitForSelector: { + propDefinition: [ + app, + "waitForSelector", + ], + }, + blockResource: { + propDefinition: [ + app, + "blockResource", + ], + }, + }, + async run({ $ }) { + const response = await this.app.generalExtraction({ + $, + params: { + url: this.url, + browser: this.browser, + return_page_source: this.returnPageSource, + cookies: this.cookies, + js_snippet: this.jsSnippet, + proxy_type: this.proxyType, + proxy_country: this.proxyCountry, + wait_for_selector: this.waitForSelector, + block_resource: this.blockResource, + }, + }); + + $.export("$summary", "Successfully sent the request to ScrapingAnt"); + + return response; + }, +}; diff --git a/components/scrapingant/common/constants.mjs b/components/scrapingant/common/constants.mjs new file mode 100644 index 0000000000000..31ae259af8282 --- /dev/null +++ b/components/scrapingant/common/constants.mjs @@ -0,0 +1,164 @@ +export default { + PROXY_COUNTRIES: [ + { + label: "World", + value: "", + }, + { + label: "Brazil", + value: "BR", + }, + { + label: "Canada", + value: "CA", + }, + { + label: "China", + value: "CN", + }, + { + label: "Czech Republic", + value: "CZ", + }, + { + label: "France", + value: "FR", + }, + { + label: "Germany", + value: "DE", + }, + { + label: "Hong Kong", + value: "HK", + }, + { + label: "India", + value: "IN", + }, + { + label: "Indonesia", + value: "ID", + }, + { + label: "Italy", + value: "IT", + }, + { + label: "Israel", + value: "IL", + }, + { + label: "Japan", + value: "JP", + }, + { + label: "Netherlands", + value: "NL", + }, + { + label: "Poland", + value: "PL", + }, + { + label: "Russia", + value: "RU", + }, + { + label: "Saudi Arabia", + value: "SA", + }, + { + label: "Singapore", + value: "SG", + }, + { + label: "South Korea", + value: "KR", + }, + { + label: "Spain", + value: "ES", + }, + { + label: "United Kingdom", + value: "GB", + }, + { + label: "United Arab Emirates", + value: "AE", + }, + { + label: "USA", + value: "US", + }, + { + label: "Vietnam", + value: "VN", + }, + ], + PROXY_TYPES: [ + { + label: "Residential", + value: "residential", + }, + { + label: "Datacenter", + value: "datacenter", + }, + ], + RESOURCE_TYPES: [ + { + label: "Document", + value: "document", + }, + { + label: "Stylesheet", + value: "stylesheet", + }, + { + label: "Image", + value: "image", + }, + { + label: "Media", + value: "media", + }, + { + label: "Font", + value: "font", + }, + { + label: "Script", + value: "script", + }, + { + label: "Texttrack", + value: "texttrack", + }, + { + label: "XHR", + value: "xhr", + }, + { + label: "Fetch", + value: "fetch", + }, + { + label: "Eventsource", + value: "eventsource", + }, + { + label: "Websocket", + value: "websocket", + }, + { + label: "Manifest", + value: "manifest", + }, + { + label: "Other", + value: "other", + }, + ], +}; diff --git a/components/scrapingant/package.json b/components/scrapingant/package.json index a5db939a0a1aa..582b04146eb1c 100644 --- a/components/scrapingant/package.json +++ b/components/scrapingant/package.json @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } -} \ No newline at end of file +} diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index 5204975b31454..afd0f20f78e71 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -1,11 +1,91 @@ +import { axios } from "@pipedream/platform"; +import constants from "./common/constants.mjs"; + export default { type: "app", app: "scrapingant", - propDefinitions: {}, + propDefinitions: { + url: { + type: "string", + label: "URL", + description: "URL to scrape. This is a required parameter", + }, + browser: { + type: "boolean", + label: "Browser", + description: "Enables using a headless browser for scraping", + optional: true, + }, + returnPageSource: { + type: "boolean", + label: "Return Page Source", + description: "Enables returning data returned by the server and unaltered by the browser. Default: false. When true - JS won't be rendered. This feature works only with `browser=true`", + optional: true, + }, + cookies: { + type: "string", + label: "Cookies", + description: "Cookies to pass with a scraping request to the target site, i.e.: `cookie_name1=cookie_value1;cookie_name2=cookie_value2`", + optional: true, + }, + jsSnippet: { + type: "string", + label: "JS Snippet", + description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser. This feature works only with `browser=true`", + optional: true, + }, + proxyType: { + type: "string", + label: "Proxy Type", + description: "Specifies proxy type to make request from", + options: constants.PROXY_TYPES, + optional: true, + }, + proxyCountry: { + type: "string", + label: "Proxy Country", + description: "Specifies the proxy country to make the request from", + options: constants.PROXY_COUNTRIES, + optional: true, + }, + waitForSelector: { + type: "string", + label: "Wait for Selector", + description: "The CSS selector of the element our service will wait for before returning result. This feature works only with `browser=true`", + optional: true, + }, + blockResource: { + type: "string[]", + label: "Block Resource", + description: "Prevents cloud browser from loading specified resource types. Available resource types: `document`, `stylesheet`, `image`, `media`, `font`, `script`, `texttrack`, `xhr`, `fetch`, `eventsource`, `websocket`, `manifest`, `other`. This feature works only with `browser=true`", + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.scrapingant.com/v2"; + }, + async _makeRequest(opts = {}) { + const { + $ = this, + path, + headers, + ...otherOpts + } = opts; + return axios($, { + ...otherOpts, + url: this._baseUrl() + path, + headers: { + ...headers, + "x-api-key": `${this.$auth.api_token}`, + }, + }); + }, + async generalExtraction(args = {}) { + return this._makeRequest({ + path: "/general", + ...args, + }); }, }, }; From 67f013224ad56132978c2231fc5cc04898e0009b Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Thu, 23 Jan 2025 12:21:08 -0300 Subject: [PATCH 2/9] Done requests changes --- components/scrapingant/package.json | 2 +- pnpm-lock.yaml | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/components/scrapingant/package.json b/components/scrapingant/package.json index 582b04146eb1c..01400364fee48 100644 --- a/components/scrapingant/package.json +++ b/components/scrapingant/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/scrapingant", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream ScrapingAnt Components", "main": "scrapingant.app.mjs", "keywords": [ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a55bc744e4c8f..ffde7a005c43a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -997,8 +997,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/azure_api_for_fhir: - specifiers: {} + components/azure_api_for_fhir: {} components/azure_devops: dependencies: @@ -3416,8 +3415,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/exist: - specifiers: {} + components/exist: {} components/expedy: dependencies: @@ -4746,8 +4744,7 @@ importers: components/hasura: {} - components/have_i_been_pwned: - specifiers: {} + components/have_i_been_pwned: {} components/heartbeat: {} @@ -6612,8 +6609,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/mindbody: - specifiers: {} + components/mindbody: {} components/mindmeister: {} @@ -7668,8 +7664,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/paylocity: - specifiers: {} + components/paylocity: {} components/paymo: dependencies: @@ -9209,7 +9204,11 @@ importers: specifier: ^3.0.1 version: 3.0.3 - components/scrapingant: {} + components/scrapingant: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 components/scrapingbee: {} @@ -12083,8 +12082,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/yext: - specifiers: {} + components/yext: {} components/yoast_seo: {} From 369e83db0747274532c44dd562ea5bfced240300 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 28 Jan 2025 13:34:18 -0300 Subject: [PATCH 3/9] Update general-extraction.mjs --- .../actions/general-extraction/general-extraction.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/scrapingant/actions/general-extraction/general-extraction.mjs b/components/scrapingant/actions/general-extraction/general-extraction.mjs index 03f5dd1c77839..44641cc46286e 100644 --- a/components/scrapingant/actions/general-extraction/general-extraction.mjs +++ b/components/scrapingant/actions/general-extraction/general-extraction.mjs @@ -3,7 +3,7 @@ import app from "../../scrapingant.app.mjs"; export default { key: "scrapingant-general-extraction", name: "General Extraction", - description: "Send a request using the standard extraction method of ScrapingAnt.", + description: "Send a request using the standard extraction method of ScrapingAnt. [See the documentation](https://docs.scrapingant.com/request-response-format)", version: "0.0.1", type: "action", props: { From 80bc1cb3aba420d39f7c6b1dc9510e2570e87923 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 4 Feb 2025 10:12:59 -0300 Subject: [PATCH 4/9] Update components/scrapingant/scrapingant.app.mjs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Guilherme Falcão <48412907+GTFalcao@users.noreply.github.com> --- components/scrapingant/scrapingant.app.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index afd0f20f78e71..09231cea488f9 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -8,7 +8,7 @@ export default { url: { type: "string", label: "URL", - description: "URL to scrape. This is a required parameter", + description: "The URL to scrape", }, browser: { type: "boolean", From a84b4b9fcf413629c6caad235285df968406e629 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 4 Feb 2025 10:13:31 -0300 Subject: [PATCH 5/9] Update components/scrapingant/scrapingant.app.mjs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Guilherme Falcão <48412907+GTFalcao@users.noreply.github.com> --- components/scrapingant/scrapingant.app.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index 09231cea488f9..1e40f591f8e0e 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -57,7 +57,7 @@ export default { blockResource: { type: "string[]", label: "Block Resource", - description: "Prevents cloud browser from loading specified resource types. Available resource types: `document`, `stylesheet`, `image`, `media`, `font`, `script`, `texttrack`, `xhr`, `fetch`, `eventsource`, `websocket`, `manifest`, `other`. This feature works only with `browser=true`", + description: "Prevents cloud browser from loading the specified resource types", optional: true, }, }, From 1312bba6ca1af7252a195c50fa4e935208ae9357 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 4 Feb 2025 10:13:38 -0300 Subject: [PATCH 6/9] Update components/scrapingant/scrapingant.app.mjs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Guilherme Falcão <48412907+GTFalcao@users.noreply.github.com> --- components/scrapingant/scrapingant.app.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index 1e40f591f8e0e..c49ece556e55f 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -51,7 +51,7 @@ export default { waitForSelector: { type: "string", label: "Wait for Selector", - description: "The CSS selector of the element our service will wait for before returning result. This feature works only with `browser=true`", + description: "The CSS selector of the element Scrapingant will wait for before returning the result", optional: true, }, blockResource: { From f9a981626b945b71cfe899f785a6d65e2f1d911c Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 4 Feb 2025 10:13:43 -0300 Subject: [PATCH 7/9] Update components/scrapingant/scrapingant.app.mjs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Guilherme Falcão <48412907+GTFalcao@users.noreply.github.com> --- components/scrapingant/scrapingant.app.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index c49ece556e55f..3ba2290a29406 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -37,7 +37,7 @@ export default { proxyType: { type: "string", label: "Proxy Type", - description: "Specifies proxy type to make request from", + description: "Specifies the proxy type to make the request from", options: constants.PROXY_TYPES, optional: true, }, From 3308015450018a7ee8da14d8969b3019fbbb4fe3 Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Tue, 4 Feb 2025 10:16:10 -0300 Subject: [PATCH 8/9] Added actions --- .../general-extraction/general-extraction.mjs | 26 +++++++- components/scrapingant/common/constants.mjs | 66 ++++--------------- components/scrapingant/scrapingant.app.mjs | 7 +- 3 files changed, 42 insertions(+), 57 deletions(-) diff --git a/components/scrapingant/actions/general-extraction/general-extraction.mjs b/components/scrapingant/actions/general-extraction/general-extraction.mjs index 44641cc46286e..ec2b79e49dc49 100644 --- a/components/scrapingant/actions/general-extraction/general-extraction.mjs +++ b/components/scrapingant/actions/general-extraction/general-extraction.mjs @@ -19,12 +19,15 @@ export default { app, "browser", ], + reloadProps: true, }, returnPageSource: { propDefinition: [ app, "returnPageSource", ], + disabled: true, + hidden: true, }, cookies: { propDefinition: [ @@ -37,6 +40,8 @@ export default { app, "jsSnippet", ], + disabled: true, + hidden: true, }, proxyType: { propDefinition: [ @@ -61,8 +66,27 @@ export default { app, "blockResource", ], + disabled: true, + hidden: true, }, }, + async additionalProps(existingProps) { + const props = {}; + if (this.browser) { + existingProps.returnPageSource.hidden = false; + existingProps.returnPageSource.disabled = false; + } + if (this.browser) { + existingProps.jsSnippet.hidden = false; + existingProps.jsSnippet.disabled = false; + } + if (this.browser) { + existingProps.blockResource.hidden = false; + existingProps.blockResource.disabled = false; + } + return props; + }, + async run({ $ }) { const response = await this.app.generalExtraction({ $, @@ -78,9 +102,7 @@ export default { block_resource: this.blockResource, }, }); - $.export("$summary", "Successfully sent the request to ScrapingAnt"); - return response; }, }; diff --git a/components/scrapingant/common/constants.mjs b/components/scrapingant/common/constants.mjs index 31ae259af8282..d9e83df79edbe 100644 --- a/components/scrapingant/common/constants.mjs +++ b/components/scrapingant/common/constants.mjs @@ -108,57 +108,19 @@ export default { }, ], RESOURCE_TYPES: [ - { - label: "Document", - value: "document", - }, - { - label: "Stylesheet", - value: "stylesheet", - }, - { - label: "Image", - value: "image", - }, - { - label: "Media", - value: "media", - }, - { - label: "Font", - value: "font", - }, - { - label: "Script", - value: "script", - }, - { - label: "Texttrack", - value: "texttrack", - }, - { - label: "XHR", - value: "xhr", - }, - { - label: "Fetch", - value: "fetch", - }, - { - label: "Eventsource", - value: "eventsource", - }, - { - label: "Websocket", - value: "websocket", - }, - { - label: "Manifest", - value: "manifest", - }, - { - label: "Other", - value: "other", - }, + "document", + "stylesheet", + "image", + "media", + "font", + "script", + "texttrack", + "xhr", + "fetch", + "eventsource", + "websocket", + "manifest", + "other", ], + }; diff --git a/components/scrapingant/scrapingant.app.mjs b/components/scrapingant/scrapingant.app.mjs index 3ba2290a29406..7bfa28e192529 100644 --- a/components/scrapingant/scrapingant.app.mjs +++ b/components/scrapingant/scrapingant.app.mjs @@ -19,7 +19,7 @@ export default { returnPageSource: { type: "boolean", label: "Return Page Source", - description: "Enables returning data returned by the server and unaltered by the browser. Default: false. When true - JS won't be rendered. This feature works only with `browser=true`", + description: "Enables returning data returned by the server and unaltered by the browser. When true JS won't be rendered", optional: true, }, cookies: { @@ -31,7 +31,7 @@ export default { jsSnippet: { type: "string", label: "JS Snippet", - description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser. This feature works only with `browser=true`", + description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser", optional: true, }, proxyType: { @@ -57,7 +57,8 @@ export default { blockResource: { type: "string[]", label: "Block Resource", - description: "Prevents cloud browser from loading the specified resource types", + description: "Prevents cloud browser from loading specified resource types", + options: constants.RESOURCE_TYPES, optional: true, }, }, From dbda1e9cc48044f784ccdddb396a96554879980a Mon Sep 17 00:00:00 2001 From: Lucas Caresia Date: Thu, 6 Feb 2025 08:37:22 -0300 Subject: [PATCH 9/9] Done requests changes --- .../actions/general-extraction/general-extraction.mjs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/components/scrapingant/actions/general-extraction/general-extraction.mjs b/components/scrapingant/actions/general-extraction/general-extraction.mjs index ec2b79e49dc49..860c0ed9460ff 100644 --- a/components/scrapingant/actions/general-extraction/general-extraction.mjs +++ b/components/scrapingant/actions/general-extraction/general-extraction.mjs @@ -75,15 +75,12 @@ export default { if (this.browser) { existingProps.returnPageSource.hidden = false; existingProps.returnPageSource.disabled = false; - } - if (this.browser) { existingProps.jsSnippet.hidden = false; existingProps.jsSnippet.disabled = false; - } - if (this.browser) { existingProps.blockResource.hidden = false; existingProps.blockResource.disabled = false; } + return props; },