Skip to content

Commit 0e5190f

Browse files
sebastianMindeeianardee
authored andcommitted
✨ Add OCR common field (#37)
1 parent 459a9f8 commit 0e5190f

File tree

7 files changed

+278
-3
lines changed

7 files changed

+278
-3
lines changed

lib/mindee/geometry.rb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,23 @@
33
module Mindee
44
# Various helper functions for geometry.
55
module Geometry
6+
# A set of minimum and maximum values.
7+
class MinMax
8+
# Minimum
9+
# @return [Float]
10+
attr_reader :min
11+
# Maximum
12+
# @return [Float]
13+
attr_reader :max
14+
15+
# @param min [Float]
16+
# @param max [Float]
17+
def initialize(min, max)
18+
@min = min
19+
@max = max
20+
end
21+
end
22+
623
# A relative set of coordinates (X, Y) on the document.
724
class Point
825
# @return [Float]
@@ -71,7 +88,20 @@ def [](key)
7188
end
7289
end
7390

91+
# Contains any number of vertex coordinates (Points).
7492
class Polygon < Array
93+
# Get the central point (centroid) of the polygon.
94+
def centroid
95+
Geometry.get_centroid(self)
96+
end
97+
98+
# Determine if the Point is in the Polygon's Y-axis.
99+
# @param point [Mindee::Geometry::Point]
100+
# @return [Boolean]
101+
def point_in_y?(point)
102+
min_max = Geometry.get_min_max_y(self)
103+
min_max.min <= point.y && point.y <= min_max.max
104+
end
75105
end
76106

77107
# Transform a prediction into a Quadrilateral.
@@ -114,5 +144,31 @@ def self.get_bounding_box(vertices)
114144
Point.new(x_min, y_max)
115145
)
116146
end
147+
148+
# Get the central point (centroid) given a sequence of points.
149+
# @param points [Array<Mindee::Geometry::Point>]
150+
# @return [Mindee::Geometry::Point]
151+
def self.get_centroid(points)
152+
vertices_count = points.size
153+
x_sum = points.map(&:x).sum
154+
y_sum = points.map(&:y).sum
155+
Point.new(x_sum / vertices_count, y_sum / vertices_count)
156+
end
157+
158+
# Get the maximum and minimum Y value given a sequence of points.
159+
# @param points [Array<Mindee::Geometry::Point>]
160+
# @return [Mindee::Geometry::MinMax]
161+
def self.get_min_max_y(points)
162+
coords = points.map(&:y)
163+
MinMax.new(coords.min, coords.max)
164+
end
165+
166+
# Get the maximum and minimum X value given a sequence of points.
167+
# @param points [Array<Mindee::Geometry::Point>]
168+
# @return [Mindee::Geometry::MinMax]
169+
def self.get_min_max_x(points)
170+
coords = points.map(&:x)
171+
MinMax.new(coords.min, coords.max)
172+
end
117173
end
118174
end

lib/mindee/parsing/api_response.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,14 @@ class ApiResponse
9090
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
9191
# @param http_response [Hash]
9292
def initialize(prediction_class, http_response)
93-
if http_response.key?('document') && (!http_response.key?('job') || http_response['job']['status'] == 'completed')
93+
@api_request = Mindee::ApiRequest.new(http_response['api_request']) if http_response.key?('api_request')
94+
if http_response.key?('document') &&
95+
(!http_response.key?('job') ||
96+
http_response['job']['status'] == 'completed') &&
97+
@api_request.status == RequestStatus::SUCCESS
9498
@document = Mindee::Document.new(prediction_class, http_response['document'])
9599
end
96100
@job = Mindee::Job.new(http_response['job']) if http_response.key?('job')
97-
@api_request = Mindee::ApiRequest.new(http_response['api_request']) if http_response.key?('api_request')
98101
end
99102
end
100103
end

lib/mindee/parsing/document.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,25 @@ class Document
1111
attr_reader :name
1212
# @return [String] Mindee ID of the document
1313
attr_reader :id
14+
# @return [Mindee::Ocr::Ocr, nil]
15+
attr_reader :ocr
16+
17+
# @param http_response [Hash]
18+
# @return [Mindee::Ocr::Ocr]
19+
def self.load_ocr(http_response)
20+
ocr_prediction = http_response.fetch('ocr', nil)
21+
return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?
22+
23+
Ocr(ocr_prediction)
24+
end
1425

1526
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
1627
# @param http_response [Hash]
1728
def initialize(prediction_class, http_response)
1829
@id = http_response['id']
1930
@name = http_response['name']
2031
@inference = Mindee::Inference.new(prediction_class, http_response['inference'])
32+
@ocr = self.class.load_ocr(http_response)
2133
end
2234

2335
def to_s

lib/mindee/parsing/prediction/common_fields.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
require_relative 'common_fields/position'
1111
require_relative 'common_fields/tax'
1212
require_relative 'common_fields/text'
13+
require_relative 'common_fields/ocr'

lib/mindee/parsing/prediction/common_fields/base.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
module Mindee
66
# Base Field object, upon which fields and feature fields are built
77
class AbstractField
8-
# @return [Array<Array<Float>>]
8+
# @return [Mindee::Geometry::Quadrilateral]
99
attr_reader :bounding_box
1010
# @return [Mindee::Geometry::Polygon]
1111
attr_reader :polygon
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# frozen_string_literal: true
2+
3+
module Mindee
4+
module Ocr
5+
# A single word.
6+
class OcrWord
7+
# The confidence score, value will be between 0.0 and 1.0
8+
# @return [Float]
9+
attr_accessor :confidence
10+
# @return [String]
11+
attr_reader :text
12+
# @return [Mindee::Geometry::Quadrilateral]
13+
attr_reader :bounding_box
14+
# @return [Mindee::Geometry::Polygon]
15+
attr_reader :polygon
16+
17+
# @param prediction [Hash]
18+
def initialize(prediction)
19+
@text = prediction['text']
20+
@confidence = prediction['confidence']
21+
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
22+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
23+
end
24+
25+
def to_s
26+
@text.to_s
27+
end
28+
end
29+
30+
# A list of words which are on the same line.
31+
class OcrLine < Array
32+
# @param prediction [Hash, nil]
33+
# @param array [Array, nil]
34+
# @param page_id [Integer, nil]
35+
def initialize(prediction = nil, from_array = nil)
36+
if !prediction.nil?
37+
super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
38+
elsif !from_array.nil?
39+
super(from_array)
40+
end
41+
end
42+
43+
# Sort the words on the line from left to right.
44+
def sort_on_x
45+
from_array = sort do |word1, word2|
46+
Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
47+
end
48+
OcrLine.new(nil, from_array)
49+
end
50+
51+
def to_s
52+
each(&:to_s).join(' ')
53+
end
54+
end
55+
56+
# OCR extraction for a single page.
57+
class OcrPage
58+
# All the words on the page, in semi-random order.
59+
# @param all_words [Array<OcrWord>]
60+
attr_reader :all_words
61+
# @param lines [Array<OcrLines>]
62+
attr_reader :lines
63+
64+
def initialize(prediction)
65+
@lines = []
66+
@all_words = []
67+
prediction['all_words'].each do |word_prediction|
68+
@all_words.push(OcrWord.new(word_prediction))
69+
end
70+
end
71+
72+
# All the words on the page, ordered in lines.
73+
# @return [Array<OcrLine>]
74+
def all_lines
75+
@lines = to_lines if @lines.empty?
76+
@lines
77+
end
78+
79+
def to_s
80+
lines = all_lines
81+
return '' if lines.empty?
82+
83+
out_str = String.new
84+
lines.map do |line|
85+
out_str << "#{line}\n" unless line.to_s.strip.empty?
86+
end
87+
out_str.strip
88+
end
89+
90+
private
91+
92+
# Helper function that iterates through all the words and compares them to a candidate
93+
# @param sorted_words [Array<OcrWord>]
94+
# @param current [OcrWord]
95+
# @param indexes [Array<Integer>]
96+
# @param current [Array<OcrLine>]
97+
def parse_one(sorted_words, current, indexes, lines)
98+
line = OcrLine.new([])
99+
sorted_words.each_with_index do |word, idx|
100+
next if indexes.include?(idx)
101+
102+
if current.nil?
103+
current = word
104+
indexes.push(idx)
105+
line = OcrLine.new([])
106+
line.push(word)
107+
elsif words_on_same_line?(current, word)
108+
line.push(word)
109+
indexes.push(idx)
110+
end
111+
end
112+
lines.push(line.sort_on_x) if line.any?
113+
end
114+
115+
# Order all the words on the page into lines.
116+
# @param current [OcrWord, nil]
117+
# @param indexes [Array<Integer>]
118+
# @param lines [Array<OcrLine>]
119+
# @return [Array<OcrLine>]
120+
def to_lines
121+
current = nil
122+
indexes = []
123+
lines = []
124+
125+
# make sure words are sorted from top to bottom
126+
all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
127+
all_words.each do
128+
parse_one(all_words, current, indexes, lines)
129+
current = nil
130+
end
131+
lines
132+
end
133+
134+
# Determine if two words are on the same line.
135+
# @param current_word [Mindee::Ocr::OcrWord]
136+
# @param next_word [Mindee::Ocr::OcrWord]
137+
# @return Boolean
138+
def words_on_same_line?(current_word, next_word)
139+
current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
140+
next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
141+
current_in_next || next_in_current
142+
end
143+
end
144+
145+
# Mindee Vision V1.
146+
class MVisionV1
147+
# List of pages.
148+
# @param pages [Array<OcrPage>]
149+
attr_reader :pages
150+
151+
def initialize(prediction)
152+
@pages = []
153+
prediction['pages'].each do |page_prediction|
154+
@pages.push(OcrPage.new(page_prediction))
155+
end
156+
end
157+
158+
def to_s
159+
out_str = String.new
160+
@pages.map do |page|
161+
out_str << "\n"
162+
out_str << page.to_s
163+
end
164+
out_str.strip
165+
end
166+
end
167+
168+
# OCR extraction from the entire document.
169+
class Ocr
170+
# Mindee Vision v1 results.
171+
# @return [Mindee::Ocr::MVisionV1]
172+
attr_reader :mvision_v1
173+
174+
def initialize(prediction)
175+
@mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
176+
end
177+
178+
def to_s
179+
@mvision_v1.to_s
180+
end
181+
end
182+
end
183+
end

spec/ocr_spec.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# frozen_string_literal: true
2+
3+
require 'json'
4+
require 'mindee'
5+
6+
require_relative './data'
7+
8+
DIR_OCR = File.join(DATA_DIR, 'ocr')
9+
10+
describe Mindee::Ocr::Ocr do
11+
context 'An OCR extraction' do
12+
json_data = load_json(DIR_OCR, 'complete_with_ocr.json')
13+
it 'should extract ocr data from a document' do
14+
expected_text = read_file(DIR_OCR, 'ocr.txt')
15+
ocr = Mindee::Ocr::Ocr.new(json_data['document']['ocr'])
16+
expect(ocr.to_s).to eq(expected_text)
17+
expect(ocr.mvision_v1.pages[0].to_s).to eq(expected_text)
18+
end
19+
end
20+
end

0 commit comments

Comments
 (0)