Skip to content

Commit d0d4b7c

Browse files
✨ add support for remote resource fetching (#132)
1 parent eea3e0e commit d0d4b7c

File tree

11 files changed

+235
-197
lines changed

11 files changed

+235
-197
lines changed

bin/mindee.rb

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,6 @@
9797
sync: false,
9898
async: true,
9999
},
100-
"fr-carte-vitale" => {
101-
description: "FR Carte Vitale",
102-
doc_class: Mindee::Product::FR::CarteVitale::CarteVitaleV1,
103-
sync: true,
104-
async: false,
105-
},
106100
"fr-id-card" => {
107101
description: "FR ID Card",
108102
doc_class: Mindee::Product::FR::IdCard::IdCardV2,

docs/code_samples/carte_vitale_v1.txt

Lines changed: 0 additions & 19 deletions
This file was deleted.

lib/mindee/input/sources/url_input_source.rb

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# frozen_string_literal: true
22

3+
require 'net/http'
4+
require 'uri'
5+
require 'fileutils'
6+
37
module Mindee
48
module Input
59
module Source
@@ -13,6 +17,110 @@ def initialize(url)
1317

1418
@url = url
1519
end
20+
21+
# Downloads the file from the URL and saves it to the specified path.
22+
#
23+
# @param path [String] Path to save the file to.
24+
# @param filename [String, nil] Optional name to give to the file.
25+
# @param username [String, nil] Optional username for authentication.
26+
# @param password [String, nil] Optional password for authentication.
27+
# @param token [String, nil] Optional token for JWT-based authentication.
28+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
29+
# @return [String] The full path of the saved file.
30+
def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
31+
response_body = fetch_file_content(username: username, password: password, token: token,
32+
max_redirects: max_redirects)
33+
34+
filename = fill_filename(filename)
35+
36+
full_path = File.join(path.chomp('/'), filename)
37+
File.write(full_path, response_body)
38+
39+
full_path
40+
end
41+
42+
# Downloads the file from the url, and returns a BytesInputSource wrapper object for it.
43+
#
44+
# @param filename [String, nil] Optional name to give to the file.
45+
# @param username [String, nil] Optional username for authentication.
46+
# @param password [String, nil] Optional password for authentication.
47+
# @param token [String, nil] Optional token for JWT-based authentication.
48+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
49+
# @return [BytesInputSource] The full path of the saved file.
50+
def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
51+
filename = fill_filename(filename)
52+
response_body = fetch_file_content(username: username, password: password, token: token,
53+
max_redirects: max_redirects)
54+
bytes = StringIO.new(response_body)
55+
56+
BytesInputSource.new(bytes.read, filename)
57+
end
58+
59+
# Fetches the file content from the URL.
60+
#
61+
# @param username [String, nil] Optional username for authentication.
62+
# @param password [String, nil] Optional password for authentication.
63+
# @param token [String, nil] Optional token for JWT-based authentication.
64+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
65+
# @return [String] The downloaded file content.
66+
def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3)
67+
uri = URI.parse(@url)
68+
request = Net::HTTP::Get.new(uri)
69+
70+
request['Authorization'] = "Bearer #{token}" if token
71+
request.basic_auth(username, password) if username && password
72+
73+
response = make_request(uri, request, max_redirects)
74+
if response.code.to_i > 299
75+
raise "Failed to download file: HTTP status code #{response.code}"
76+
elsif response.code.to_i < 200
77+
raise "Failed to download file: Invalid response code #{response.code}."
78+
end
79+
80+
response.body
81+
end
82+
83+
private
84+
85+
def extract_filename_from_url(uri)
86+
filename = File.basename(uri.path)
87+
filename.empty? ? '' : filename
88+
end
89+
90+
def fill_filename(filename)
91+
filename ||= extract_filename_from_url(URI.parse(@url))
92+
if filename.empty? || File.extname(filename).empty?
93+
filename = generate_file_name(extension: get_file_extension(filename))
94+
end
95+
filename
96+
end
97+
98+
def make_request(uri, request, max_redirects)
99+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
100+
response = http.request(request)
101+
if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
102+
location = response['location']
103+
raise 'No location in redirection header.' if location.nil?
104+
105+
new_uri = URI.parse(location)
106+
request = Net::HTTP::Get.new(new_uri)
107+
make_request(new_uri, request, max_redirects - 1)
108+
else
109+
response
110+
end
111+
end
112+
end
113+
114+
def get_file_extension(filename)
115+
ext = File.extname(filename)
116+
ext.empty? ? nil : ext.downcase
117+
end
118+
119+
def generate_file_name(extension: nil)
120+
extension ||= '.tmp'
121+
random_string = Array.new(8) { rand(36).to_s(36) }.join
122+
"mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}"
123+
end
16124
end
17125
end
18126
end

lib/mindee/product.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
require_relative 'product/fr/bank_account_details/bank_account_details_v2'
1414
require_relative 'product/fr/bank_statement/bank_statement_v1'
1515
require_relative 'product/fr/carte_grise/carte_grise_v1'
16-
require_relative 'product/fr/carte_vitale/carte_vitale_v1'
1716
require_relative 'product/fr/id_card/id_card_v1'
1817
require_relative 'product/fr/id_card/id_card_v2'
1918
require_relative 'product/fr/energy_bill/energy_bill_v1'

lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb

Lines changed: 0 additions & 41 deletions
This file was deleted.

lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb

Lines changed: 0 additions & 52 deletions
This file was deleted.

lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb

Lines changed: 0 additions & 34 deletions
This file was deleted.

spec/document/fr/carte_vitale_v1_spec.rb

Lines changed: 0 additions & 32 deletions
This file was deleted.

spec/input/sources_spec.rb

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,6 @@
5050
end
5151
end
5252

53-
context 'A remote url file' do
54-
it 'should not send an invalid URL' do
55-
expect do
56-
Mindee::Input::Source::UrlInputSource.new('http://invalid-url')
57-
end.to raise_error 'URL must be HTTPS'
58-
end
59-
it 'should send a valid URL' do
60-
input = Mindee::Input::Source::UrlInputSource.new('https://platform.mindee.com')
61-
expect(input.url).to eq('https://platform.mindee.com')
62-
end
63-
end
64-
6553
context 'A broken fixable PDF' do
6654
mindee_client = Mindee::Client.new(api_key: 'invalid-api-key')
6755
it 'Should not raise a mime error' do
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# frozen_string_literal: true
2+
3+
require 'rspec'
4+
require 'mindee'
5+
6+
describe Mindee::Input::Source::UrlInputSource do
7+
it 'retrieves response from a remote file' do
8+
api_key = ENV.fetch('MINDEE_API_KEY', nil)
9+
client = Mindee::Client.new(api_key: api_key)
10+
remote_input = Mindee::Input::Source::UrlInputSource.new('https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/invoice_5p.pdf?raw=true')
11+
12+
local_input = remote_input.as_local_input_source
13+
expect(local_input.filename).to eq('invoice_5p.pdf')
14+
15+
result = client.parse(local_input, Mindee::Product::Invoice::InvoiceV4)
16+
expect(result.document.n_pages).to eq(5)
17+
end
18+
end

0 commit comments

Comments
 (0)