Skip to content

Commit 3b429c3

Browse files
committed
enforce v5
1 parent 7d42b28 commit 3b429c3

File tree

5 files changed

+17
-63
lines changed

5 files changed

+17
-63
lines changed

.github/workflows/clang19.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
pull_request:
55
branches: [main, master]
66

7-
name: "clang 19"
7+
name: "clang19-rhub"
88

99
jobs:
1010
check:

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ License: Apache License (>= 2)
2525
URL: https://pacha.dev/cpp11tesseract/
2626
BugReports: https://github.com/pachadotdev/cpp11tesseract/issues
2727
SystemRequirements:
28-
Tesseract OCR (
28+
Tesseract OCR (>= 5.0.0) (
2929
deb: libtesseract-dev libleptonica-dev tesseract-ocr-eng,
3030
rpm: tesseract-devel leptonica-devel tesseract-langpack-eng,
3131
brew: tesseract leptonica

configure

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# filepath: /home/pacha/github/cpp11tesseract/configure
12
#!/bin/sh
23

34
# Anticonf (tm) script by Pacha (2024)
@@ -8,8 +9,8 @@
89

910
# Library settings
1011
PKG_CONFIG_NAME="tesseract"
11-
PKG_DEB_NAME="libtesseract-dev libleptonica-dev"
12-
PKG_RPM_NAME="tesseract-devel leptonica-devel"
12+
PKG_DEB_NAME="libtesseract-dev libleptonica-dev tesseract-ocr-eng"
13+
PKG_RPM_NAME="tesseract-devel leptonica-devel tesseract-langpack-eng"
1314
PKG_BREW_NAME="tesseract leptonica"
1415
PKG_CFLAGS="-I/usr/include/leptonica"
1516
PKG_LIBS="-ltesseract"
@@ -50,12 +51,16 @@ echo "PKG_LIBS: $PKG_LIBS"
5051

5152
# Check if the compiler is clang
5253
if [ "$CC" = "clang" ] || [ "$CXX" = "clang++" ]; then
53-
CXX=clang++ -stdlib=libstdc++
54-
CXXFLAGS += -stdlib=libstdc++
55-
LDFLAGS += -stdlib=libstdc++
54+
PKG_LIBS="$PKG_LIBS -stdlib=libc++"
5655
fi
5756

58-
# Tesseract 4 enforces C++11 in the headers but R does not have CXX11CPP
57+
# Stop if Tesseract < 5.0.0
58+
if ! tesseract --version | grep -q "tesseract 5"; then
59+
echo "Tesseract >= 5.0.0 is required"
60+
exit 1
61+
fi
62+
63+
# Tesseract 5 enforces C++11 in the headers but R does not have CXX11CPP
5964
CPPFLAGS=`${R_HOME}/bin/R CMD config CPPFLAGS`
6065
CXX11STD=`${R_HOME}/bin/R CMD config CXX11STD`
6166
if [ $? -eq 0 ]; then

src/cpp11tesseract.cpp

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,17 @@
11
#include "cpp11tesseract_types.h"
22

3-
#if TESSERACT_MAJOR_VERSION < 5
4-
#include <tesseract/genericvector.h>
5-
#else
63
#define STRING std::string
74
#define GenericVector std::vector
8-
#endif
95

106
[[cpp11::register]] int tesseract_major_version() {
117
return TESSERACT_MAJOR_VERSION;
128
}
139

1410
using namespace cpp11;
1511

16-
/* libtesseract 4.0 insisted that the engine is initiated in 'C' locale.
17-
* We do this as exemplified in the example code in the libc manual:
18-
* https://www.gnu.org/software/libc/manual/html_node/Setting-the-Locale.html
19-
* Full discussion: https://github.com/tesseract-ocr/tesseract/issues/1670
20-
*/
21-
#if TESSERACT_MAJOR_VERSION == 4 && TESSERACT_MINOR_VERSION == 0
22-
#define TESSERACT40
23-
#endif
24-
2512
static tesseract::TessBaseAPI *make_analyze_api() {
26-
#ifdef TESSERACT40
27-
char *old_ctype = strdup(setlocale(LC_ALL, NULL));
28-
setlocale(LC_ALL, "C");
29-
#endif
3013
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
3114
api->InitForAnalysePage();
32-
#ifdef TESSERACT40
33-
setlocale(LC_ALL, old_ctype);
34-
free(old_ctype);
35-
#endif
3615
return api;
3716
}
3817

@@ -70,11 +49,6 @@ static tesseract::TessBaseAPI *make_analyze_api() {
7049
configs.push_back(&config_strings.back()[0]);
7150
}
7251

73-
#ifdef TESSERACT40
74-
char *old_ctype = strdup(setlocale(LC_ALL, NULL));
75-
setlocale(LC_ALL, "C");
76-
#endif
77-
7852
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
7953

8054
GenericVector<STRING> params, values;
@@ -85,11 +59,6 @@ static tesseract::TessBaseAPI *make_analyze_api() {
8559
int err = api->Init(path, lang, tesseract::OEM_DEFAULT, configs.data(),
8660
configs.size(), &params, &values, false);
8761

88-
#ifdef TESSERACT40
89-
setlocale(LC_ALL, old_ctype);
90-
free(old_ctype);
91-
#endif
92-
9362
if (err) {
9463
delete api;
9564
throw std::runtime_error(
@@ -124,13 +93,7 @@ tesseract::TessBaseAPI *get_engine(TessPtr engine) {
12493
writable::logicals out(params.size());
12594
STRING str;
12695
for (int i = 0; i < params.size(); i++) {
127-
#if TESSERACT_MAJOR_VERSION >= 5
12896
out[i] = api->GetVariableAsString(std::string(params.at(i)).c_str(), &str);
129-
#else
130-
const char *value =
131-
api->GetStringVariable(std::string(params.at(i)).c_str());
132-
out[i] = (value != nullptr);
133-
#endif
13497
}
13598
api->End();
13699
delete api;
@@ -142,15 +105,9 @@ tesseract::TessBaseAPI *get_engine(TessPtr engine) {
142105
GenericVector<STRING> langs;
143106
api->GetAvailableLanguagesAsVector(&langs);
144107
writable::strings available;
145-
#if TESSERACT_MAJOR_VERSION >= 5
146108
for (const auto &lang : langs) {
147109
available.push_back(lang);
148110
}
149-
#else
150-
for (int i = 0; i < langs.size(); i++) {
151-
available.push_back(langs.get(i).c_str());
152-
}
153-
#endif
154111
langs.clear();
155112
api->GetLoadedLanguagesAsVector(&langs);
156113
writable::strings loaded;
@@ -175,15 +132,8 @@ tesseract::TessBaseAPI *get_engine(TessPtr engine) {
175132
std::vector<std::string> values;
176133
for (int i = 0; i < params.size(); ++i) {
177134
STRING str;
178-
#if TESSERACT_MAJOR_VERSION >= 5
179135
if (api->GetVariableAsString(std::string(params.at(i)).c_str(), &str)) {
180136
values.push_back(str);
181-
#else
182-
const char *value =
183-
api->GetStringVariable(std::string(params.at(i)).c_str());
184-
if (value) {
185-
values.push_back(value);
186-
#endif
187137
} else {
188138
values.push_back("");
189139
}

src/cpp11tesseract_types.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
#include <tesseract/baseapi.h> // tesseract
2-
#include <allheaders.h> // leptonica
1+
#include <allheaders.h> // leptonica
2+
#include <tesseract/baseapi.h> // tesseract
33

4-
#include <memory>
4+
#include <cpp11.hpp>
55
#include <list>
6+
#include <memory>
67
#include <string>
78
#include <vector>
89

9-
#include <cpp11.hpp>
10-
1110
inline void tess_finalizer(tesseract::TessBaseAPI* engine) {
1211
engine->End();
1312
delete engine;

0 commit comments

Comments
 (0)