Skip to content

Commit c92f9e5

Browse files
committed
[Components] ocrspace #15148
Actions - Process Image - Process PDF
1 parent 184b9d9 commit c92f9e5

File tree

10 files changed

+313
-605
lines changed

10 files changed

+313
-605
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import FormData from "form-data";
2+
import { getUrlOrFile } from "../../common/utils.mjs";
3+
import ocrspace from "../../ocrspace.app.mjs";
4+
5+
export default {
6+
props: {
7+
ocrspace,
8+
language: {
9+
propDefinition: [
10+
ocrspace,
11+
"language",
12+
],
13+
},
14+
isOverlayRequired: {
15+
propDefinition: [
16+
ocrspace,
17+
"isOverlayRequired",
18+
],
19+
},
20+
detectOrientation: {
21+
propDefinition: [
22+
ocrspace,
23+
"detectOrientation",
24+
],
25+
},
26+
scale: {
27+
propDefinition: [
28+
ocrspace,
29+
"scale",
30+
],
31+
},
32+
isTable: {
33+
propDefinition: [
34+
ocrspace,
35+
"isTable",
36+
],
37+
},
38+
ocrEngine: {
39+
propDefinition: [
40+
ocrspace,
41+
"ocrEngine",
42+
],
43+
},
44+
},
45+
async run({ $ }) {
46+
const data = new FormData();
47+
const {
48+
url, file,
49+
} = getUrlOrFile(this.file);
50+
51+
if (url) data.append("url", url);
52+
if (file) data.append("file", file);
53+
if (this.imageLanguage) data.append("language", this.imageLanguage);
54+
if (this.isOverlayRequired) data.append("isOverlayRequired", `${this.isOverlayRequired}`);
55+
if (this.filetype) data.append("filetype", this.filetype);
56+
if (this.detectOrientation) data.append("detectOrientation", `${this.detectOrientation}`);
57+
if (this.scale) data.append("scale", `${this.scale}`);
58+
if (this.isTable) data.append("isTable", `${this.isTable}`);
59+
if (this.ocrEngine) data.append("OCREngine", this.ocrEngine);
60+
61+
const response = await this.ocrspace.processImage({
62+
$,
63+
data,
64+
headers: data.getHeaders(),
65+
});
66+
67+
$.export("$summary", this.getSummary());
68+
return response;
69+
},
70+
};

components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs

Lines changed: 0 additions & 26 deletions
This file was deleted.
Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,30 @@
1-
import ocrspace from "../../ocrspace.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import common from "../common/process-base.mjs";
32

43
export default {
4+
...common,
55
key: "ocrspace-process-image",
66
name: "Process Image",
77
description: "Submits an image file for OCR processing using OCR.space. [See the documentation](https://ocr.space/ocrapi)",
8-
version: "0.0.{{ts}}",
8+
version: "0.0.1",
99
type: "action",
1010
props: {
11-
ocrspace: {
12-
type: "app",
13-
app: "ocrspace",
14-
},
15-
imageUrl: {
16-
propDefinition: [
17-
"ocrspace",
18-
"imageUrl",
19-
],
20-
},
21-
imageFile: {
11+
...common.props,
12+
file: {
2213
propDefinition: [
23-
"ocrspace",
24-
"imageFile",
14+
common.props.ocrspace,
15+
"file",
2516
],
2617
},
27-
imageLanguage: {
18+
filetype: {
2819
propDefinition: [
29-
"ocrspace",
30-
"imageLanguage",
20+
common.props.ocrspace,
21+
"filetype",
3122
],
32-
optional: true,
3323
},
3424
},
35-
async run({ $ }) {
36-
if (!this.imageUrl && !this.imageFile) {
37-
throw new Error("Either Image File URL or Image File Upload must be provided.");
38-
}
39-
40-
const response = await this.ocrspace.submitImage({
41-
imageUrl: this.imageUrl,
42-
imageFile: this.imageFile,
43-
imageLanguage: this.imageLanguage,
44-
});
45-
46-
const summary = response.JobId
47-
? `Image submitted for OCR processing. Job ID: ${response.JobId}`
48-
: "Image submitted for OCR processing.";
49-
50-
$.export("$summary", summary);
51-
return response;
25+
methods: {
26+
getSummary() {
27+
return "Image submitted for OCR processing.";
28+
},
5229
},
5330
};
Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,26 @@
1-
import ocrspace from "../../ocrspace.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import common from "../common/process-base.mjs";
32

43
export default {
4+
...common,
55
key: "ocrspace-process-pdf",
66
name: "Process PDF for OCR",
7-
description: "Submit a PDF for OCR processing. [See the documentation]()",
8-
version: "0.0.{{ts}}",
7+
description: "Submit a PDF for OCR processing. [See the documentation](https://ocr.space/ocrapi)",
8+
version: "0.0.1",
99
type: "action",
1010
props: {
11-
ocrspace: {
12-
type: "app",
13-
app: "ocrspace",
14-
},
15-
pdfUrl: {
16-
propDefinition: [
17-
ocrspace,
18-
"pdfUrl",
19-
],
20-
},
21-
pdfFile: {
22-
propDefinition: [
23-
ocrspace,
24-
"pdfFile",
25-
],
26-
},
27-
pdfLanguage: {
28-
propDefinition: [
29-
ocrspace,
30-
"pdfLanguage",
31-
],
32-
optional: true,
33-
},
34-
pdfPages: {
11+
...common.props,
12+
file: {
3513
propDefinition: [
36-
ocrspace,
37-
"pdfPages",
14+
common.props.ocrspace,
15+
"file",
3816
],
39-
optional: true,
17+
label: "PDF File",
18+
description: "The URL of the PDF file or the path to the file saved to the `/tmp` directory (e.g. `/tmp/example.jpg`) to process. [See the documentation](https://pipedream.com/docs/workflows/steps/code/nodejs/working-with-files/#the-tmp-directory).",
4019
},
4120
},
42-
async run({ $ }) {
43-
const response = await this.ocrspace.submitPdf({
44-
pdfUrl: this.pdfUrl,
45-
pdfFile: this.pdfFile,
46-
pdfLanguage: this.pdfLanguage,
47-
pdfPages: this.pdfPages,
48-
});
49-
50-
const jobId = response.JobId || response.jobId || "N/A";
51-
$.export("$summary", `Submitted PDF for OCR processing. Job ID: ${jobId}`);
52-
return response;
21+
methods: {
22+
getSummary() {
23+
return "Submitted PDF for OCR processing.";
24+
},
5325
},
5426
};
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
export const LANGUAGE_OPTIONS = [
2+
{
3+
label: "Arabic",
4+
value: "ara",
5+
},
6+
{
7+
label: "Bulgarian",
8+
value: "bul",
9+
},
10+
{
11+
label: "Chinese (Simplified)",
12+
value: "chs",
13+
},
14+
{
15+
label: "Chinese (Traditional)",
16+
value: "cht",
17+
},
18+
{
19+
label: "Croatian",
20+
value: "hrv",
21+
},
22+
{
23+
label: "Czech",
24+
value: "cze",
25+
},
26+
{
27+
label: "Danish",
28+
value: "dan",
29+
},
30+
{
31+
label: "Dutch",
32+
value: "dut",
33+
},
34+
{
35+
label: "English",
36+
value: "eng",
37+
},
38+
{
39+
label: "Finnish",
40+
value: "fin",
41+
},
42+
{
43+
label: "French",
44+
value: "fre",
45+
},
46+
{
47+
label: "German",
48+
value: "ger",
49+
},
50+
{
51+
label: "Greek",
52+
value: "gre",
53+
},
54+
{
55+
label: "Hungarian",
56+
value: "hun",
57+
},
58+
{
59+
label: "Korean",
60+
value: "kor",
61+
},
62+
{
63+
label: "Italian",
64+
value: "ita",
65+
},
66+
{
67+
label: "Japanese",
68+
value: "jpn",
69+
},
70+
{
71+
label: "Polish",
72+
value: "pol",
73+
},
74+
{
75+
label: "Portuguese",
76+
value: "por",
77+
},
78+
{
79+
label: "Russian",
80+
value: "rus",
81+
},
82+
{
83+
label: "Slovenian",
84+
value: "slv",
85+
},
86+
{
87+
label: "Spanish",
88+
value: "spa",
89+
},
90+
{
91+
label: "Swedish",
92+
value: "swe",
93+
},
94+
{
95+
label: "Turkish",
96+
value: "tur",
97+
},
98+
];
99+
100+
export const IMAGE_FILETYPE_OPTIONS = [
101+
"GIF",
102+
"PNG",
103+
"JPG",
104+
"TIF",
105+
"BMP",
106+
];
107+
108+
export const OCR_ENGINE_OPTIONS = [
109+
{
110+
label: "OCR Engine 1",
111+
value: "1",
112+
},
113+
{
114+
label: "OCR Engine 2",
115+
value: "2",
116+
},
117+
];
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import fs from "fs";
2+
3+
export const isValidUrl = (urlString) => {
4+
var urlPattern = new RegExp("^(https?:\\/\\/)?" + // validate protocol
5+
"((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|" + // validate domain name
6+
"((\\d{1,3}\\.){3}\\d{1,3}))" + // validate OR ip (v4) address
7+
"(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*" + // validate port and path
8+
"(\\?[;&a-z\\d%_.~+=-]*)?" + // validate query string
9+
"(\\#[-a-z\\d_]*)?$", "i"); // validate fragment locator
10+
return !!urlPattern.test(urlString);
11+
};
12+
13+
export const checkTmp = (filename) => {
14+
if (filename.indexOf("/tmp") === -1) {
15+
return `/tmp/${filename}`;
16+
}
17+
return filename;
18+
};
19+
20+
export const getUrlOrFile = (url) => {
21+
if (!isValidUrl(url)) {
22+
const data = fs.readFileSync(checkTmp(url));
23+
const base64Image = Buffer.from(data, "binary").toString("base64");
24+
return {
25+
file: `data:image/jpeg;base64,${base64Image}`,
26+
};
27+
}
28+
return {
29+
url,
30+
};
31+
};

0 commit comments

Comments
 (0)