From 5a7092c34d851db9baf0a6cceba11e10e77151e9 Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Mon, 30 Dec 2024 11:45:30 -0500 Subject: [PATCH 1/3] scrapegraphai init --- .../fetch-scraping-results.mjs | 34 +++ .../start-scraping-job/start-scraping-job.mjs | 53 +++++ .../stop-running-job/stop-running-job.mjs | 26 +++ components/scrapegraphai/package.json | 2 +- .../scrapegraphai/scrapegraphai.app.mjs | 217 +++++++++++++++++- .../new-data-available-instant.mjs | 109 +++++++++ .../new-error-logged-instant.mjs | 97 ++++++++ .../new-scrape-completed-instant.mjs | 213 +++++++++++++++++ 8 files changed, 748 insertions(+), 3 deletions(-) create mode 100644 components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs create mode 100644 components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs create mode 100644 components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs create mode 100644 components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs create mode 100644 components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs create mode 100644 components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs diff --git a/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs b/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs new file mode 100644 index 0000000000000..f89db06971605 --- /dev/null +++ b/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs @@ -0,0 +1,34 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "scrapegraphai-fetch-scraping-results", + name: "Fetch Scraping Results", + description: "Retrieves the results of a completed scraping job. [See the documentation]().", + version: "0.0.{{ts}}", + type: "action", + props: { + scrapegraphai, + jobId: { + propDefinition: [ + scrapegraphai, + "jobId", + ], + }, + filterDataFields: { + propDefinition: [ + scrapegraphai, + "filterDataFields", + ], + optional: true, + }, + }, + async run({ $ }) { + const results = await this.scrapegraphai.retrieveScrapingResults({ + jobId: this.jobId, + filterDataFields: this.filterDataFields, + }); + $.export("$summary", `Successfully retrieved scraping results for job ${this.jobId}`); + return results; + }, +}; diff --git a/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs b/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs new file mode 100644 index 0000000000000..8022b0d4a0a7a --- /dev/null +++ b/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs @@ -0,0 +1,53 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "scrapegraphai-start-scraping-job", + name: "Start Scraping Job", + description: "Starts a new web scraping job. [See the documentation](${{docsLink}})", + version: "0.0.{{ts}}", + type: "action", + props: { + scrapegraphai: { + type: "app", + app: "scrapegraphai", + }, + url: { + propDefinition: [ + "scrapegraphai", + "url", + ], + }, + dataFields: { + propDefinition: [ + "scrapegraphai", + "dataFields", + ], + optional: true, + }, + paginationSettings: { + propDefinition: [ + "scrapegraphai", + "paginationSettings", + ], + optional: true, + }, + headers: { + propDefinition: [ + "scrapegraphai", + "headers", + ], + optional: true, + }, + }, + async run({ $ }) { + const response = await this.scrapegraphai.startScrapingJob({ + url: this.url, + dataFields: this.dataFields, + paginationSettings: this.paginationSettings, + headers: this.headers, + }); + $.export("$summary", `Started scraping job with Job ID: ${response.job_id}`); + return response; + }, +}; diff --git a/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs b/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs new file mode 100644 index 0000000000000..9216d683d437d --- /dev/null +++ b/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs @@ -0,0 +1,26 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "scrapegraphai-stop-running-job", + name: "Stop Running Job", + description: "Stops a currently running web scraping job. [See the documentation](https://docs.scrapegraphai.com/)", + version: "0.0.{{ts}}", + type: "action", + props: { + scrapegraphai, + jobId: { + propDefinition: [ + scrapegraphai, + "jobId", + ], + }, + }, + async run({ $ }) { + const response = await this.scrapegraphai.stopScrapingJob({ + jobId: this.jobId, + }); + $.export("$summary", `Stopped scraping job ${this.jobId}`); + return response; + }, +}; diff --git a/components/scrapegraphai/package.json b/components/scrapegraphai/package.json index 9a824f842637b..21b306173d37f 100644 --- a/components/scrapegraphai/package.json +++ b/components/scrapegraphai/package.json @@ -12,4 +12,4 @@ "publishConfig": { "access": "public" } -} \ No newline at end of file +} diff --git a/components/scrapegraphai/scrapegraphai.app.mjs b/components/scrapegraphai/scrapegraphai.app.mjs index 8cb52844a8eb4..87010c9802bcf 100644 --- a/components/scrapegraphai/scrapegraphai.app.mjs +++ b/components/scrapegraphai/scrapegraphai.app.mjs @@ -1,11 +1,224 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "scrapegraphai", - propDefinitions: {}, + version: "0.0.{ts}", + propDefinitions: { + url: { + type: "string", + label: "URL to Scrape", + description: "The URL of the website to scrape.", + }, + jobId: { + type: "string", + label: "Job ID", + description: "The ID of the scraping job.", + }, + dataFields: { + type: "string[]", + label: "Data Fields", + description: "Optional data fields to extract from the scraped content.", + optional: true, + }, + paginationSettings: { + type: "string[]", + label: "Pagination Settings", + description: "Optional pagination settings for the scraping job.", + optional: true, + }, + headers: { + type: "string[]", + label: "Headers", + description: "Optional headers to include in the scraping request.", + optional: true, + }, + filterDataFields: { + type: "string[]", + label: "Filter Data Fields", + description: "Optional data fields to filter the results.", + optional: true, + }, + taskId: { + type: "string", + label: "Task ID", + description: "The ID of the scraping task to monitor.", + optional: true, + }, + scrapingJobFilter: { + type: "string", + label: "Scraping Job Filter", + description: "Filter events by specific scraping jobs.", + optional: true, + }, + dataTypeFilter: { + type: "string", + label: "Data Type Filter", + description: "Filter events by specific data types.", + optional: true, + }, + scrapingTaskNameFilter: { + type: "string", + label: "Scraping Task Name Filter", + description: "Filter events by specific scraping task names.", + optional: true, + }, + errorTypeFilter: { + type: "string", + label: "Error Type Filter", + description: "Filter error events by specific error types.", + optional: true, + }, + }, methods: { // this.$auth contains connected account data authKeys() { console.log(Object.keys(this.$auth)); }, + _baseUrl() { + return "https://api.scrapegraphai.com/v1"; + }, + async _makeRequest(opts = {}) { + const { + $ = this, method = "GET", path = "/", headers, ...otherOpts + } = opts; + return axios($, { + ...otherOpts, + method, + url: this._baseUrl() + path, + headers: { + ...headers, + Authorization: `Bearer ${this.$auth.api_key}`, + }, + }); + }, + async startScrapingJob(opts = {}) { + const { + url, + dataFields, + paginationSettings, + headers, + ...otherOpts + } = opts; + const data = { + url: this.url, + }; + if (this.dataFields) { + data.data_fields = this.dataFields.map(JSON.parse); + } + if (this.paginationSettings) { + data.pagination_settings = this.paginationSettings.map(JSON.parse); + } + if (this.headers) { + data.headers = this.headers.map(JSON.parse); + } + return this._makeRequest({ + method: "POST", + path: "/smartscraper/start", + data, + ...otherOpts, + }); + }, + async retrieveScrapingResults(opts = {}) { + const { + jobId, filterDataFields, ...otherOpts + } = opts; + const params = { + job_id: this.jobId, + }; + if (this.filterDataFields) { + params.filter_data_fields = this.filterDataFields; + } + return this._makeRequest({ + method: "GET", + path: "/smartscraper/get-results", + params, + ...otherOpts, + }); + }, + async stopScrapingJob(opts = {}) { + const { + jobId, ...otherOpts + } = opts; + return this._makeRequest({ + method: "POST", + path: "/smartscraper/stop", + data: { + job_id: this.jobId, + }, + ...otherOpts, + }); + }, + async onTaskCompleted(opts = {}) { + const { + taskId, scrapingJobFilter, ...otherOpts + } = opts; + const params = {}; + if (this.taskId) { + params.task_id = this.taskId; + } + if (this.scrapingJobFilter) { + params.scraping_job = this.scrapingJobFilter; + } + return this._makeRequest({ + method: "GET", + path: "/events/task-completed", + params, + ...otherOpts, + }); + }, + async onNewDataAvailable(opts = {}) { + const { + dataTypeFilter, scrapingTaskNameFilter, ...otherOpts + } = opts; + const params = {}; + if (this.dataTypeFilter) { + params.data_type = this.dataTypeFilter; + } + if (this.scrapingTaskNameFilter) { + params.scraping_task_name = this.scrapingTaskNameFilter; + } + return this._makeRequest({ + method: "GET", + path: "/events/new-data", + params, + ...otherOpts, + }); + }, + async onErrorOccurred(opts = {}) { + const { + errorTypeFilter, scrapingJobFilter, ...otherOpts + } = opts; + const params = {}; + if (this.errorTypeFilter) { + params.error_type = this.errorTypeFilter; + } + if (this.scrapingJobFilter) { + params.scraping_job = this.scrapingJobFilter; + } + return this._makeRequest({ + method: "GET", + path: "/events/error", + params, + ...otherOpts, + }); + }, + async paginate(fn, ...opts) { + const results = []; + const fetchPage = async (page = 1) => { + const response = await fn({ + page, + ...opts, + }); + if (response && response.items && response.items.length > 0) { + results.push(...response.items); + if (response.has_more) { + await fetchPage(page + 1); + } + } + }; + await fetchPage(); + return results; + }, }, -}; \ No newline at end of file +}; diff --git a/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs b/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs new file mode 100644 index 0000000000000..772e012c80589 --- /dev/null +++ b/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs @@ -0,0 +1,109 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import crypto from "crypto"; +import { axios } from "@pipedream/platform"; + +export default { + key: "scrapegraphai-new-data-available-instant", + name: "New Data Available - Instant", + description: "Emit new event when new data becomes available from a scraping job. [See the documentation]()", + version: "0.0.{{ts}}", + type: "source", + dedupe: "unique", + props: { + scrapegraphai: { + type: "app", + app: "scrapegraphai", + }, + dataTypeFilter: { + propDefinition: [ + "scrapegraphai", + "dataTypeFilter", + ], + optional: true, + }, + scrapingTaskNameFilter: { + propDefinition: [ + "scrapegraphai", + "scrapingTaskNameFilter", + ], + optional: true, + }, + db: "$.service.db", + http: { + type: "$.interface.http", + customResponse: true, + }, + }, + hooks: { + async deploy() { + const events = await this.paginate( + this.scrapegraphai.onNewDataAvailable, + { + dataTypeFilter: this.dataTypeFilter, + scrapingTaskNameFilter: this.scrapingTaskNameFilter, + }, + ); + const recentEvents = events.slice(-50); + // Emit from oldest to newest + for (const event of recentEvents) { + const id = event.id || event.ts || Date.now(); + const ts = event.ts + ? Date.parse(event.ts) + : Date.now(); + const summary = `New data available: ${event.dataType || "Unknown Data Type"}`; + this.$emit(event, { + id, + summary, + ts, + }); + } + }, + async activate() { + // Register webhook and save the webhook ID + const webhookResponse = await this.scrapegraphai.createWebhook({ + callbackUrl: this.http.endpoint, + dataTypeFilter: this.dataTypeFilter, + scrapingTaskNameFilter: this.scrapingTaskNameFilter, + }); + const webhookId = webhookResponse.id; + await this.db.set("webhookId", webhookId); + }, + async deactivate() { + const webhookId = await this.db.get("webhookId"); + if (webhookId) { + await this.scrapegraphai.deleteWebhook({ + webhookId, + }); + await this.db.delete("webhookId"); + } + }, + }, + async run(event) { + const signature = event.headers["x-signature"]; + const rawBody = event.raw_body; + const secretKey = this.scrapegraphai.$auth.secret_key; + const computedSignature = crypto.createHmac("sha256", secretKey).update(rawBody) + .digest("hex"); + + if (computedSignature !== signature) { + this.http.respond({ + status: 401, + body: "Unauthorized", + }); + return; + } + + const data = event.body; + const id = data.id || data.ts || Date.now(); + const ts = data.timestamp + ? Date.parse(data.timestamp) + : Date.now(); + const summary = `New data available: ${data.dataType || "Unknown Data Type"}`; + + this.$emit(data, { + id, + summary, + ts, + }); + }, +}; diff --git a/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs b/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs new file mode 100644 index 0000000000000..9929527f2cc23 --- /dev/null +++ b/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs @@ -0,0 +1,97 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import crypto from "crypto"; +import { axios } from "@pipedream/platform"; + +export default { + key: "scrapegraphai-new-error-logged-instant", + name: "New Error Logged (Instant)", + description: "Emit a new event when an error occurs during a scraping task. Users can filter by error type or scraping job for targeted monitoring. [See the documentation]()", + version: "0.0.{{ts}}", + type: "source", + dedupe: "unique", + props: { + scrapegraphai, + db: "$.service.db", + http: { + type: "$.interface.http", + customResponse: true, + }, + errorTypeFilter: { + propDefinition: [ + scrapegraphai, + "errorTypeFilter", + ], + optional: true, + }, + scrapingJobFilter: { + propDefinition: [ + scrapegraphai, + "scrapingJobFilter", + ], + optional: true, + }, + }, + methods: { + _getWebhookId() { + return this.db.get("webhookId"); + }, + _setWebhookId(id) { + this.db.set("webhookId", id); + }, + }, + hooks: { + async deploy() { + const recentErrors = await this.scrapegraphai.paginate(this.scrapegraphai.onErrorOccurred, { + errorTypeFilter: this.errorTypeFilter, + scrapingJobFilter: this.scrapingJobFilter, + }); + + const lastFiftyErrors = recentErrors.slice(-50); + for (const error of lastFiftyErrors) { + this.$emit(error, { + id: error.id || `${Date.now()}-${error.ts}`, + summary: `Error of type ${error.errorType} occurred in job ${error.scrapingJob}`, + ts: Date.parse(error.timestamp) || Date.now(), + }); + } + }, + async activate() { + const webhook = await this.scrapegraphai.onErrorOccurred({ + errorTypeFilter: this.errorTypeFilter, + scrapingJobFilter: this.scrapingJobFilter, + }); + this._setWebhookId(webhook.id); + }, + async deactivate() { + const webhookId = this._getWebhookId(); + if (webhookId) { + await this.scrapegraphai._makeRequest({ + method: "DELETE", + path: `/webhooks/${webhookId}`, + }); + this._setWebhookId(null); + } + }, + }, + async run(event) { + const signature = event.headers["x-scrapegraphai-signature"]; + const secret = this.scrapegraphai.$auth.api_key; + const hash = crypto.createHmac("sha256", secret).update(event.body) + .digest("hex"); + + if (hash !== signature) { + this.http.respond({ + status: 401, + body: "Unauthorized", + }); + return; + } + + const errorEvent = JSON.parse(event.body); + this.$emit(errorEvent, { + id: errorEvent.id || `${Date.now()}-${errorEvent.timestamp}`, + summary: `Error of type ${errorEvent.errorType} occurred in job ${errorEvent.scrapingJob}`, + ts: Date.parse(errorEvent.timestamp) || Date.now(), + }); + }, +}; diff --git a/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs b/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs new file mode 100644 index 0000000000000..64b71dc6c8d8e --- /dev/null +++ b/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs @@ -0,0 +1,213 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; +import { axios } from "@pipedream/platform"; +import crypto from "crypto"; + +export default { + key: "scrapegraphai-new-scrape-completed-instant", + name: "New Scrape Completed - Instant", + description: "Emit a new event when a web scraping task is completed. [See the documentation](", + version: "0.0.{{ts}}", + type: "source", + dedupe: "unique", + props: { + scrapegraphai: { + type: "app", + app: "scrapegraphai", + }, + http: { + type: "$.interface.http", + customResponse: true, + }, + db: "$.service.db", + taskId: { + propDefinition: [ + scrapegraphai, + "taskId", + ], + optional: true, + }, + scrapingJobFilter: { + propDefinition: [ + scrapegraphai, + "scrapingJobFilter", + ], + optional: true, + }, + dataTypeFilter: { + propDefinition: [ + scrapegraphai, + "dataTypeFilter", + ], + optional: true, + }, + scrapingTaskNameFilter: { + propDefinition: [ + scrapegraphai, + "scrapingTaskNameFilter", + ], + optional: true, + }, + errorTypeFilter: { + propDefinition: [ + scrapegraphai, + "errorTypeFilter", + ], + optional: true, + }, + }, + methods: { + async _getWebhookId() { + return this.db.get("webhookId"); + }, + async _setWebhookId(id) { + await this.db.set("webhookId", id); + }, + }, + hooks: { + async deploy() { + const events = await this.scrapegraphai.paginate(this.scrapegraphai.onTaskCompleted, { + taskId: this.taskId, + scrapingJobFilter: this.scrapingJobFilter, + dataTypeFilter: this.dataTypeFilter, + scrapingTaskNameFilter: this.scrapingTaskNameFilter, + errorTypeFilter: this.errorTypeFilter, + }); + const eventsToEmit = events.slice(-50).reverse(); + for (const event of eventsToEmit) { + const id = event.id || `${Date.now()}`; + const summary = event.summary || `Task ${event.taskId} completed`; + const ts = event.completedAt + ? Date.parse(event.completedAt) + : Date.now(); + this.$emit(event, { + id, + summary, + ts, + }); + } + }, + async activate() { + const response = await this.scrapegraphai._makeRequest({ + method: "POST", + path: "/webhooks", + data: { + callback_url: this.http.endpoint, + event: "task_completed", + ...(this.taskId + ? { + task_id: this.taskId, + } + : {}), + ...(this.scrapingJobFilter + ? { + scraping_job_filter: this.scrapingJobFilter, + } + : {}), + ...(this.dataTypeFilter + ? { + data_type_filter: this.dataTypeFilter, + } + : {}), + ...(this.scrapingTaskNameFilter + ? { + scraping_task_name_filter: this.scrapingTaskNameFilter, + } + : {}), + ...(this.errorTypeFilter + ? { + error_type_filter: this.errorTypeFilter, + } + : {}), + }, + }); + await this._setWebhookId(response.id); + }, + async deactivate() { + const webhookId = await this._getWebhookId(); + if (webhookId) { + await this.scrapegraphai._makeRequest({ + method: "DELETE", + path: `/webhooks/${webhookId}`, + }); + await this.db.delete("webhookId"); + } + }, + }, + async run(event) { + const signature = event.headers["x-scrapegraphai-signature"]; + const payload = JSON.stringify(event.body); + const secret = this.scrapegraphai.$auth.webhook_secret; + + const computedSignature = crypto + .createHmac("sha256", secret) + .update(payload) + .digest("hex"); + + if (computedSignature !== signature) { + await this.http.respond({ + status: 401, + body: "Unauthorized", + }); + return; + } + + const data = event.body; + + if (this.taskId && data.taskId !== this.taskId) { + await this.http.respond({ + status: 200, + body: "Event does not match the specified Task ID.", + }); + return; + } + + if (this.scrapingJobFilter && data.scrapingJob !== this.scrapingJobFilter) { + await this.http.respond({ + status: 200, + body: "Event does not match the specified Scraping Job Filter.", + }); + return; + } + + if (this.dataTypeFilter && data.dataType !== this.dataTypeFilter) { + await this.http.respond({ + status: 200, + body: "Event does not match the specified Data Type Filter.", + }); + return; + } + + if (this.scrapingTaskNameFilter && data.scrapingTaskName !== this.scrapingTaskNameFilter) { + await this.http.respond({ + status: 200, + body: "Event does not match the specified Scraping Task Name Filter.", + }); + return; + } + + if (this.errorTypeFilter && data.errorType !== this.errorTypeFilter) { + await this.http.respond({ + status: 200, + body: "Event does not match the specified Error Type Filter.", + }); + return; + } + + const id = data.id || `${Date.now()}`; + const summary = data.summary || `Task ${data.taskId} completed`; + const ts = data.completedAt + ? Date.parse(data.completedAt) + : Date.now(); + + this.$emit(data, { + id, + summary, + ts, + }); + + await this.http.respond({ + status: 200, + body: "Event received and processed.", + }); + }, +}; From 8704d05c070d1e41207fe2bfcbb7f4f4ef1e688f Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Mon, 30 Dec 2024 12:55:06 -0500 Subject: [PATCH 2/3] new components --- .../fetch-scraping-results.mjs | 34 --- .../start-local-scraper.mjs | 56 +++++ .../start-markdownify/start-markdownify.mjs | 51 ++++ .../start-scraping-job/start-scraping-job.mjs | 53 ----- .../start-smart-scraper.mjs | 57 +++++ .../stop-running-job/stop-running-job.mjs | 26 --- components/scrapegraphai/package.json | 5 +- .../scrapegraphai/scrapegraphai.app.mjs | 217 ++++-------------- .../new-data-available-instant.mjs | 109 --------- .../new-error-logged-instant.mjs | 97 -------- .../new-scrape-completed-instant.mjs | 213 ----------------- 11 files changed, 208 insertions(+), 710 deletions(-) delete mode 100644 components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs create mode 100644 components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs create mode 100644 components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs delete mode 100644 components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs create mode 100644 components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs delete mode 100644 components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs delete mode 100644 components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs delete mode 100644 components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs delete mode 100644 components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs diff --git a/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs b/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs deleted file mode 100644 index f89db06971605..0000000000000 --- a/components/scrapegraphai/actions/fetch-scraping-results/fetch-scraping-results.mjs +++ /dev/null @@ -1,34 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import { axios } from "@pipedream/platform"; - -export default { - key: "scrapegraphai-fetch-scraping-results", - name: "Fetch Scraping Results", - description: "Retrieves the results of a completed scraping job. [See the documentation]().", - version: "0.0.{{ts}}", - type: "action", - props: { - scrapegraphai, - jobId: { - propDefinition: [ - scrapegraphai, - "jobId", - ], - }, - filterDataFields: { - propDefinition: [ - scrapegraphai, - "filterDataFields", - ], - optional: true, - }, - }, - async run({ $ }) { - const results = await this.scrapegraphai.retrieveScrapingResults({ - jobId: this.jobId, - filterDataFields: this.filterDataFields, - }); - $.export("$summary", `Successfully retrieved scraping results for job ${this.jobId}`); - return results; - }, -}; diff --git a/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs b/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs new file mode 100644 index 0000000000000..97c66f16ddf25 --- /dev/null +++ b/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs @@ -0,0 +1,56 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-local-scraper", + name: "Start Local Scraper", + description: "Extract content from HTML content using AI by providing a natural language prompt and the HTML content. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/localscraper/start)", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + html: { + type: "string", + label: "HTML", + description: "The HTML to scrape", + }, + prompt: { + propDefinition: [ + scrapegraphai, + "prompt", + ], + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startLocalScraper({ + $, + data: { + website_html: this.html, + user_prompt: this.prompt, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getLocalScraperStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } scraping HTML.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs b/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs new file mode 100644 index 0000000000000..6b0702ebf5c2f --- /dev/null +++ b/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs @@ -0,0 +1,51 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-markdownify", + name: "Start Markdownify", + description: "Convert any webpage into clean, readable Markdown format. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/markdownify/start)", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + url: { + propDefinition: [ + scrapegraphai, + "url", + ], + description: "The URL of the website to convert into markdown", + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startMarkdownify({ + $, + data: { + website_url: this.url, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getMarkdownifyStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } converting ${this.url} to markdown.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs b/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs deleted file mode 100644 index 8022b0d4a0a7a..0000000000000 --- a/components/scrapegraphai/actions/start-scraping-job/start-scraping-job.mjs +++ /dev/null @@ -1,53 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import { axios } from "@pipedream/platform"; - -export default { - key: "scrapegraphai-start-scraping-job", - name: "Start Scraping Job", - description: "Starts a new web scraping job. [See the documentation](${{docsLink}})", - version: "0.0.{{ts}}", - type: "action", - props: { - scrapegraphai: { - type: "app", - app: "scrapegraphai", - }, - url: { - propDefinition: [ - "scrapegraphai", - "url", - ], - }, - dataFields: { - propDefinition: [ - "scrapegraphai", - "dataFields", - ], - optional: true, - }, - paginationSettings: { - propDefinition: [ - "scrapegraphai", - "paginationSettings", - ], - optional: true, - }, - headers: { - propDefinition: [ - "scrapegraphai", - "headers", - ], - optional: true, - }, - }, - async run({ $ }) { - const response = await this.scrapegraphai.startScrapingJob({ - url: this.url, - dataFields: this.dataFields, - paginationSettings: this.paginationSettings, - headers: this.headers, - }); - $.export("$summary", `Started scraping job with Job ID: ${response.job_id}`); - return response; - }, -}; diff --git a/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs b/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs new file mode 100644 index 0000000000000..65777516228c8 --- /dev/null +++ b/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs @@ -0,0 +1,57 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-smart-scraper", + name: "Start Smart Scraper", + description: "Extract content from a webpage using AI by providing a natural language prompt and a URL. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/smartscraper/start).", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + url: { + propDefinition: [ + scrapegraphai, + "url", + ], + }, + prompt: { + propDefinition: [ + scrapegraphai, + "prompt", + ], + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startSmartScraper({ + $, + data: { + website_url: this.url, + user_prompt: this.prompt, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getSmartScraperStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } scraping ${this.url}.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs b/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs deleted file mode 100644 index 9216d683d437d..0000000000000 --- a/components/scrapegraphai/actions/stop-running-job/stop-running-job.mjs +++ /dev/null @@ -1,26 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import { axios } from "@pipedream/platform"; - -export default { - key: "scrapegraphai-stop-running-job", - name: "Stop Running Job", - description: "Stops a currently running web scraping job. [See the documentation](https://docs.scrapegraphai.com/)", - version: "0.0.{{ts}}", - type: "action", - props: { - scrapegraphai, - jobId: { - propDefinition: [ - scrapegraphai, - "jobId", - ], - }, - }, - async run({ $ }) { - const response = await this.scrapegraphai.stopScrapingJob({ - jobId: this.jobId, - }); - $.export("$summary", `Stopped scraping job ${this.jobId}`); - return response; - }, -}; diff --git a/components/scrapegraphai/package.json b/components/scrapegraphai/package.json index 21b306173d37f..ee342f5acde98 100644 --- a/components/scrapegraphai/package.json +++ b/components/scrapegraphai/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/scrapegraphai", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream ScrapeGraphAI Components", "main": "scrapegraphai.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } } diff --git a/components/scrapegraphai/scrapegraphai.app.mjs b/components/scrapegraphai/scrapegraphai.app.mjs index 87010c9802bcf..a35f9ca91450d 100644 --- a/components/scrapegraphai/scrapegraphai.app.mjs +++ b/components/scrapegraphai/scrapegraphai.app.mjs @@ -3,222 +3,85 @@ import { axios } from "@pipedream/platform"; export default { type: "app", app: "scrapegraphai", - version: "0.0.{ts}", propDefinitions: { url: { type: "string", label: "URL to Scrape", description: "The URL of the website to scrape.", }, - jobId: { + prompt: { type: "string", - label: "Job ID", - description: "The ID of the scraping job.", + label: "Prompt", + description: "A prompt describing what you want to extract. Example: `Extract info about the company`", }, - dataFields: { - type: "string[]", - label: "Data Fields", - description: "Optional data fields to extract from the scraped content.", - optional: true, - }, - paginationSettings: { - type: "string[]", - label: "Pagination Settings", - description: "Optional pagination settings for the scraping job.", - optional: true, - }, - headers: { - type: "string[]", - label: "Headers", - description: "Optional headers to include in the scraping request.", - optional: true, - }, - filterDataFields: { - type: "string[]", - label: "Filter Data Fields", - description: "Optional data fields to filter the results.", - optional: true, - }, - taskId: { - type: "string", - label: "Task ID", - description: "The ID of the scraping task to monitor.", - optional: true, - }, - scrapingJobFilter: { - type: "string", - label: "Scraping Job Filter", - description: "Filter events by specific scraping jobs.", - optional: true, - }, - dataTypeFilter: { - type: "string", - label: "Data Type Filter", - description: "Filter events by specific data types.", - optional: true, - }, - scrapingTaskNameFilter: { - type: "string", - label: "Scraping Task Name Filter", - description: "Filter events by specific scraping task names.", - optional: true, - }, - errorTypeFilter: { - type: "string", - label: "Error Type Filter", - description: "Filter error events by specific error types.", + waitForCompletion: { + type: "boolean", + label: "Wait For Completion", + description: "Set to `true` to poll the API in 3-second intervals until the request is completed", optional: true, }, }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); - }, _baseUrl() { return "https://api.scrapegraphai.com/v1"; }, - async _makeRequest(opts = {}) { - const { - $ = this, method = "GET", path = "/", headers, ...otherOpts - } = opts; + _makeRequest({ + $ = this, + path, + ...opts + }) { return axios($, { - ...otherOpts, - method, - url: this._baseUrl() + path, + url: `${this._baseUrl()}${path}`, headers: { - ...headers, - Authorization: `Bearer ${this.$auth.api_key}`, + "sgai-apikey": `${this.$auth.api_key}`, }, + ...opts, }); }, - async startScrapingJob(opts = {}) { - const { - url, - dataFields, - paginationSettings, - headers, - ...otherOpts - } = opts; - const data = { - url: this.url, - }; - if (this.dataFields) { - data.data_fields = this.dataFields.map(JSON.parse); - } - if (this.paginationSettings) { - data.pagination_settings = this.paginationSettings.map(JSON.parse); - } - if (this.headers) { - data.headers = this.headers.map(JSON.parse); - } + startSmartScraper(opts = {}) { return this._makeRequest({ method: "POST", - path: "/smartscraper/start", - data, - ...otherOpts, + path: "/smartscraper", + ...opts, }); }, - async retrieveScrapingResults(opts = {}) { - const { - jobId, filterDataFields, ...otherOpts - } = opts; - const params = { - job_id: this.jobId, - }; - if (this.filterDataFields) { - params.filter_data_fields = this.filterDataFields; - } + getSmartScraperStatus({ + requestId, ...opts + }) { return this._makeRequest({ - method: "GET", - path: "/smartscraper/get-results", - params, - ...otherOpts, + path: `/smartscraper/${requestId}`, + ...opts, }); }, - async stopScrapingJob(opts = {}) { - const { - jobId, ...otherOpts - } = opts; + startLocalScraper(opts = {}) { return this._makeRequest({ method: "POST", - path: "/smartscraper/stop", - data: { - job_id: this.jobId, - }, - ...otherOpts, + path: "/localscraper", + ...opts, }); }, - async onTaskCompleted(opts = {}) { - const { - taskId, scrapingJobFilter, ...otherOpts - } = opts; - const params = {}; - if (this.taskId) { - params.task_id = this.taskId; - } - if (this.scrapingJobFilter) { - params.scraping_job = this.scrapingJobFilter; - } + getLocalScraperStatus({ + requestId, ...opts + }) { return this._makeRequest({ - method: "GET", - path: "/events/task-completed", - params, - ...otherOpts, + path: `/localscraper/${requestId}`, + ...opts, }); }, - async onNewDataAvailable(opts = {}) { - const { - dataTypeFilter, scrapingTaskNameFilter, ...otherOpts - } = opts; - const params = {}; - if (this.dataTypeFilter) { - params.data_type = this.dataTypeFilter; - } - if (this.scrapingTaskNameFilter) { - params.scraping_task_name = this.scrapingTaskNameFilter; - } + startMarkdownify(opts = {}) { return this._makeRequest({ - method: "GET", - path: "/events/new-data", - params, - ...otherOpts, + method: "POST", + path: "/markdownify", + ...opts, }); }, - async onErrorOccurred(opts = {}) { - const { - errorTypeFilter, scrapingJobFilter, ...otherOpts - } = opts; - const params = {}; - if (this.errorTypeFilter) { - params.error_type = this.errorTypeFilter; - } - if (this.scrapingJobFilter) { - params.scraping_job = this.scrapingJobFilter; - } + getMarkdownifyStatus({ + requestId, ...opts + }) { return this._makeRequest({ - method: "GET", - path: "/events/error", - params, - ...otherOpts, + path: `/markdownify/${requestId}`, + ...opts, }); }, - async paginate(fn, ...opts) { - const results = []; - const fetchPage = async (page = 1) => { - const response = await fn({ - page, - ...opts, - }); - if (response && response.items && response.items.length > 0) { - results.push(...response.items); - if (response.has_more) { - await fetchPage(page + 1); - } - } - }; - await fetchPage(); - return results; - }, }, }; diff --git a/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs b/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs deleted file mode 100644 index 772e012c80589..0000000000000 --- a/components/scrapegraphai/sources/new-data-available-instant/new-data-available-instant.mjs +++ /dev/null @@ -1,109 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import crypto from "crypto"; -import { axios } from "@pipedream/platform"; - -export default { - key: "scrapegraphai-new-data-available-instant", - name: "New Data Available - Instant", - description: "Emit new event when new data becomes available from a scraping job. [See the documentation]()", - version: "0.0.{{ts}}", - type: "source", - dedupe: "unique", - props: { - scrapegraphai: { - type: "app", - app: "scrapegraphai", - }, - dataTypeFilter: { - propDefinition: [ - "scrapegraphai", - "dataTypeFilter", - ], - optional: true, - }, - scrapingTaskNameFilter: { - propDefinition: [ - "scrapegraphai", - "scrapingTaskNameFilter", - ], - optional: true, - }, - db: "$.service.db", - http: { - type: "$.interface.http", - customResponse: true, - }, - }, - hooks: { - async deploy() { - const events = await this.paginate( - this.scrapegraphai.onNewDataAvailable, - { - dataTypeFilter: this.dataTypeFilter, - scrapingTaskNameFilter: this.scrapingTaskNameFilter, - }, - ); - const recentEvents = events.slice(-50); - // Emit from oldest to newest - for (const event of recentEvents) { - const id = event.id || event.ts || Date.now(); - const ts = event.ts - ? Date.parse(event.ts) - : Date.now(); - const summary = `New data available: ${event.dataType || "Unknown Data Type"}`; - this.$emit(event, { - id, - summary, - ts, - }); - } - }, - async activate() { - // Register webhook and save the webhook ID - const webhookResponse = await this.scrapegraphai.createWebhook({ - callbackUrl: this.http.endpoint, - dataTypeFilter: this.dataTypeFilter, - scrapingTaskNameFilter: this.scrapingTaskNameFilter, - }); - const webhookId = webhookResponse.id; - await this.db.set("webhookId", webhookId); - }, - async deactivate() { - const webhookId = await this.db.get("webhookId"); - if (webhookId) { - await this.scrapegraphai.deleteWebhook({ - webhookId, - }); - await this.db.delete("webhookId"); - } - }, - }, - async run(event) { - const signature = event.headers["x-signature"]; - const rawBody = event.raw_body; - const secretKey = this.scrapegraphai.$auth.secret_key; - const computedSignature = crypto.createHmac("sha256", secretKey).update(rawBody) - .digest("hex"); - - if (computedSignature !== signature) { - this.http.respond({ - status: 401, - body: "Unauthorized", - }); - return; - } - - const data = event.body; - const id = data.id || data.ts || Date.now(); - const ts = data.timestamp - ? Date.parse(data.timestamp) - : Date.now(); - const summary = `New data available: ${data.dataType || "Unknown Data Type"}`; - - this.$emit(data, { - id, - summary, - ts, - }); - }, -}; diff --git a/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs b/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs deleted file mode 100644 index 9929527f2cc23..0000000000000 --- a/components/scrapegraphai/sources/new-error-logged-instant/new-error-logged-instant.mjs +++ /dev/null @@ -1,97 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import crypto from "crypto"; -import { axios } from "@pipedream/platform"; - -export default { - key: "scrapegraphai-new-error-logged-instant", - name: "New Error Logged (Instant)", - description: "Emit a new event when an error occurs during a scraping task. Users can filter by error type or scraping job for targeted monitoring. [See the documentation]()", - version: "0.0.{{ts}}", - type: "source", - dedupe: "unique", - props: { - scrapegraphai, - db: "$.service.db", - http: { - type: "$.interface.http", - customResponse: true, - }, - errorTypeFilter: { - propDefinition: [ - scrapegraphai, - "errorTypeFilter", - ], - optional: true, - }, - scrapingJobFilter: { - propDefinition: [ - scrapegraphai, - "scrapingJobFilter", - ], - optional: true, - }, - }, - methods: { - _getWebhookId() { - return this.db.get("webhookId"); - }, - _setWebhookId(id) { - this.db.set("webhookId", id); - }, - }, - hooks: { - async deploy() { - const recentErrors = await this.scrapegraphai.paginate(this.scrapegraphai.onErrorOccurred, { - errorTypeFilter: this.errorTypeFilter, - scrapingJobFilter: this.scrapingJobFilter, - }); - - const lastFiftyErrors = recentErrors.slice(-50); - for (const error of lastFiftyErrors) { - this.$emit(error, { - id: error.id || `${Date.now()}-${error.ts}`, - summary: `Error of type ${error.errorType} occurred in job ${error.scrapingJob}`, - ts: Date.parse(error.timestamp) || Date.now(), - }); - } - }, - async activate() { - const webhook = await this.scrapegraphai.onErrorOccurred({ - errorTypeFilter: this.errorTypeFilter, - scrapingJobFilter: this.scrapingJobFilter, - }); - this._setWebhookId(webhook.id); - }, - async deactivate() { - const webhookId = this._getWebhookId(); - if (webhookId) { - await this.scrapegraphai._makeRequest({ - method: "DELETE", - path: `/webhooks/${webhookId}`, - }); - this._setWebhookId(null); - } - }, - }, - async run(event) { - const signature = event.headers["x-scrapegraphai-signature"]; - const secret = this.scrapegraphai.$auth.api_key; - const hash = crypto.createHmac("sha256", secret).update(event.body) - .digest("hex"); - - if (hash !== signature) { - this.http.respond({ - status: 401, - body: "Unauthorized", - }); - return; - } - - const errorEvent = JSON.parse(event.body); - this.$emit(errorEvent, { - id: errorEvent.id || `${Date.now()}-${errorEvent.timestamp}`, - summary: `Error of type ${errorEvent.errorType} occurred in job ${errorEvent.scrapingJob}`, - ts: Date.parse(errorEvent.timestamp) || Date.now(), - }); - }, -}; diff --git a/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs b/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs deleted file mode 100644 index 64b71dc6c8d8e..0000000000000 --- a/components/scrapegraphai/sources/new-scrape-completed-instant/new-scrape-completed-instant.mjs +++ /dev/null @@ -1,213 +0,0 @@ -import scrapegraphai from "../../scrapegraphai.app.mjs"; -import { axios } from "@pipedream/platform"; -import crypto from "crypto"; - -export default { - key: "scrapegraphai-new-scrape-completed-instant", - name: "New Scrape Completed - Instant", - description: "Emit a new event when a web scraping task is completed. [See the documentation](", - version: "0.0.{{ts}}", - type: "source", - dedupe: "unique", - props: { - scrapegraphai: { - type: "app", - app: "scrapegraphai", - }, - http: { - type: "$.interface.http", - customResponse: true, - }, - db: "$.service.db", - taskId: { - propDefinition: [ - scrapegraphai, - "taskId", - ], - optional: true, - }, - scrapingJobFilter: { - propDefinition: [ - scrapegraphai, - "scrapingJobFilter", - ], - optional: true, - }, - dataTypeFilter: { - propDefinition: [ - scrapegraphai, - "dataTypeFilter", - ], - optional: true, - }, - scrapingTaskNameFilter: { - propDefinition: [ - scrapegraphai, - "scrapingTaskNameFilter", - ], - optional: true, - }, - errorTypeFilter: { - propDefinition: [ - scrapegraphai, - "errorTypeFilter", - ], - optional: true, - }, - }, - methods: { - async _getWebhookId() { - return this.db.get("webhookId"); - }, - async _setWebhookId(id) { - await this.db.set("webhookId", id); - }, - }, - hooks: { - async deploy() { - const events = await this.scrapegraphai.paginate(this.scrapegraphai.onTaskCompleted, { - taskId: this.taskId, - scrapingJobFilter: this.scrapingJobFilter, - dataTypeFilter: this.dataTypeFilter, - scrapingTaskNameFilter: this.scrapingTaskNameFilter, - errorTypeFilter: this.errorTypeFilter, - }); - const eventsToEmit = events.slice(-50).reverse(); - for (const event of eventsToEmit) { - const id = event.id || `${Date.now()}`; - const summary = event.summary || `Task ${event.taskId} completed`; - const ts = event.completedAt - ? Date.parse(event.completedAt) - : Date.now(); - this.$emit(event, { - id, - summary, - ts, - }); - } - }, - async activate() { - const response = await this.scrapegraphai._makeRequest({ - method: "POST", - path: "/webhooks", - data: { - callback_url: this.http.endpoint, - event: "task_completed", - ...(this.taskId - ? { - task_id: this.taskId, - } - : {}), - ...(this.scrapingJobFilter - ? { - scraping_job_filter: this.scrapingJobFilter, - } - : {}), - ...(this.dataTypeFilter - ? { - data_type_filter: this.dataTypeFilter, - } - : {}), - ...(this.scrapingTaskNameFilter - ? { - scraping_task_name_filter: this.scrapingTaskNameFilter, - } - : {}), - ...(this.errorTypeFilter - ? { - error_type_filter: this.errorTypeFilter, - } - : {}), - }, - }); - await this._setWebhookId(response.id); - }, - async deactivate() { - const webhookId = await this._getWebhookId(); - if (webhookId) { - await this.scrapegraphai._makeRequest({ - method: "DELETE", - path: `/webhooks/${webhookId}`, - }); - await this.db.delete("webhookId"); - } - }, - }, - async run(event) { - const signature = event.headers["x-scrapegraphai-signature"]; - const payload = JSON.stringify(event.body); - const secret = this.scrapegraphai.$auth.webhook_secret; - - const computedSignature = crypto - .createHmac("sha256", secret) - .update(payload) - .digest("hex"); - - if (computedSignature !== signature) { - await this.http.respond({ - status: 401, - body: "Unauthorized", - }); - return; - } - - const data = event.body; - - if (this.taskId && data.taskId !== this.taskId) { - await this.http.respond({ - status: 200, - body: "Event does not match the specified Task ID.", - }); - return; - } - - if (this.scrapingJobFilter && data.scrapingJob !== this.scrapingJobFilter) { - await this.http.respond({ - status: 200, - body: "Event does not match the specified Scraping Job Filter.", - }); - return; - } - - if (this.dataTypeFilter && data.dataType !== this.dataTypeFilter) { - await this.http.respond({ - status: 200, - body: "Event does not match the specified Data Type Filter.", - }); - return; - } - - if (this.scrapingTaskNameFilter && data.scrapingTaskName !== this.scrapingTaskNameFilter) { - await this.http.respond({ - status: 200, - body: "Event does not match the specified Scraping Task Name Filter.", - }); - return; - } - - if (this.errorTypeFilter && data.errorType !== this.errorTypeFilter) { - await this.http.respond({ - status: 200, - body: "Event does not match the specified Error Type Filter.", - }); - return; - } - - const id = data.id || `${Date.now()}`; - const summary = data.summary || `Task ${data.taskId} completed`; - const ts = data.completedAt - ? Date.parse(data.completedAt) - : Date.now(); - - this.$emit(data, { - id, - summary, - ts, - }); - - await this.http.respond({ - status: 200, - body: "Event received and processed.", - }); - }, -}; From 92a422cd61cc0760abf0fcd23e7aaf804bd59623 Mon Sep 17 00:00:00 2001 From: michelle0927 Date: Mon, 30 Dec 2024 12:56:33 -0500 Subject: [PATCH 3/3] pnpm-lock.yaml --- pnpm-lock.yaml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9ed2caa098d59..e7db5ed232002 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1624,8 +1624,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/change_photos: - specifiers: {} + components/change_photos: {} components/changenow: {} @@ -5394,8 +5393,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/kafka: - specifiers: {} + components/kafka: {} components/kajabi: {} @@ -8670,8 +8668,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/richpanel: - specifiers: {} + components/richpanel: {} components/ringcentral: dependencies: @@ -8952,7 +8949,10 @@ importers: version: 1.6.6 components/scrapegraphai: - specifiers: {} + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 components/scrapein_: {} @@ -10241,8 +10241,7 @@ importers: components/taggun: {} - components/taleez: - specifiers: {} + components/taleez: {} components/talend: {} @@ -24579,22 +24578,22 @@ packages: superagent@3.8.1: resolution: {integrity: sha512-VMBFLYgFuRdfeNQSMLbxGSLfmXL/xc+OO+BZp41Za/NRDBet/BNbkRJrYzCUu0u4GU0i/ml2dtT8b9qgkw9z6Q==} engines: {node: '>= 4.0'} - deprecated: Please upgrade to v9.0.0+ as we have fixed a public vulnerability with formidable dependency. Note that v9.0.0+ requires Node.js v14.18.0+. See https://github.com/ladjs/superagent/pull/1800 for insight. This project is supported and maintained by the team at Forward Email @ https://forwardemail.net + deprecated: Please upgrade to v7.0.2+ of superagent. We have fixed numerous issues with streams, form-data, attach(), filesystem errors not bubbling up (ENOENT on attach()), and all tests are now passing. See the releases tab for more information at . superagent@4.1.0: resolution: {integrity: sha512-FT3QLMasz0YyCd4uIi5HNe+3t/onxMyEho7C3PSqmti3Twgy2rXT4fmkTz6wRL6bTF4uzPcfkUCa8u4JWHw8Ag==} engines: {node: '>= 6.0'} - deprecated: Please upgrade to v9.0.0+ as we have fixed a public vulnerability with formidable dependency. Note that v9.0.0+ requires Node.js v14.18.0+. See https://github.com/ladjs/superagent/pull/1800 for insight. This project is supported and maintained by the team at Forward Email @ https://forwardemail.net + deprecated: Please upgrade to v7.0.2+ of superagent. We have fixed numerous issues with streams, form-data, attach(), filesystem errors not bubbling up (ENOENT on attach()), and all tests are now passing. See the releases tab for more information at . superagent@5.3.1: resolution: {integrity: sha512-wjJ/MoTid2/RuGCOFtlacyGNxN9QLMgcpYLDQlWFIhhdJ93kNscFonGvrpAHSCVjRVj++DGCglocF7Aej1KHvQ==} engines: {node: '>= 7.0.0'} - deprecated: Please upgrade to v9.0.0+ as we have fixed a public vulnerability with formidable dependency. Note that v9.0.0+ requires Node.js v14.18.0+. See https://github.com/ladjs/superagent/pull/1800 for insight. This project is supported and maintained by the team at Forward Email @ https://forwardemail.net + deprecated: Please upgrade to v7.0.2+ of superagent. We have fixed numerous issues with streams, form-data, attach(), filesystem errors not bubbling up (ENOENT on attach()), and all tests are now passing. See the releases tab for more information at . superagent@7.1.6: resolution: {integrity: sha512-gZkVCQR1gy/oUXr+kxJMLDjla434KmSOKbx5iGD30Ql+AkJQ/YlPKECJy2nhqOsHLjGHzoDTXNSjhnvWhzKk7g==} engines: {node: '>=6.4.0 <13 || >=14'} - deprecated: Please upgrade to v9.0.0+ as we have fixed a public vulnerability with formidable dependency. Note that v9.0.0+ requires Node.js v14.18.0+. See https://github.com/ladjs/superagent/pull/1800 for insight. This project is supported and maintained by the team at Forward Email @ https://forwardemail.net + deprecated: Please downgrade to v7.1.5 if you need IE/ActiveXObject support OR upgrade to v8.0.0 as we no longer support IE and published an incorrect patch version (see https://github.com/visionmedia/superagent/issues/1731) supports-color@2.0.0: resolution: {integrity: sha512-KKNVtd6pCYgPIKU4cp2733HWYCpplQhddZLBUryaAHou723x+FRzQ5Df824Fj+IyyuiQTRoub4SnIFfIcrp70g==}