Skip to content

Commit 0fba587

Browse files
author
Stefan Tudose
committed
make quote escape character configurable
1 parent 25dc33e commit 0fba587

File tree

8 files changed

+149
-9
lines changed

8 files changed

+149
-9
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- allow client to configure the quote escape character
13+
1214
### Changed
1315

1416
### Deprecated

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ The behavior of the decoder can be configured by passing one of following option
9292
- Comma: the character that separates values. Default value is comma.
9393
- IgnoreHeaders: if set to true, the first line will be ignored. This is useful when the CSV file contains a header line.
9494
- IgnoreUnmatchingFields: if set to true, the number of fields and scan targets are allowed to be different. By default, if they don't match exactly it will cause an error.
95+
- EscapeChar: the character used to escape the quote character in quoted fields. The default is the quote itself as used by the `encoding/csv` reader.
9596

9697
```golang
9798
decoder, err := csvdecoder.NewWithConfig(file, csvdecoder.Config{Comma: ';', IgnoreHeaders: true})

decoder.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ type Config struct {
1919
Comma rune // the character that separates values. Default value is comma.
2020
IgnoreHeaders bool // if set to true, the first line will be ignored
2121
IgnoreUnmatchingFields bool // if set to true, the number of fields and scan targets are allowed to be different
22+
EscapeChar rune // the character used to escape the quote character in quoted fields. The default is the quote itself.
2223
}
2324

2425
// New returns a new CSV decoder that reads from r.
@@ -29,10 +30,20 @@ func NewWithConfig(r io.Reader, config Config) (*Decoder, error) {
2930

3031
// New returns a new CSV decoder that reads from r
3132
func New(r io.Reader) (*Decoder, error) {
32-
return newDecoder(r, Config{})
33+
return newDecoder(r, Config{
34+
EscapeChar: defaultEscapeChar,
35+
})
3336
}
3437

3538
func newDecoder(reader io.Reader, config Config) (*Decoder, error) {
39+
if config.EscapeChar != defaultEscapeChar {
40+
var err error
41+
reader, err = NewReaderWithCustomEscape(reader, config.EscapeChar)
42+
if err != nil {
43+
return nil, err
44+
}
45+
}
46+
3647
p := &Decoder{
3748
reader: csv.NewReader(reader),
3849
config: config,

decoder_interface_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func TestDecoderStruct(t *testing.T) {
4141
t.Run(tc.name, func(t *testing.T) {
4242
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
4343
if err != nil {
44-
t.Fatalf("could not create d: %w", err)
44+
t.Fatalf("could not create d: %s", err)
4545
}
4646

4747
for d.Next() {
@@ -77,7 +77,7 @@ func TestDecoderPointer(t *testing.T) {
7777
t.Run(tc.name, func(t *testing.T) {
7878
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
7979
if err != nil {
80-
t.Fatalf("could not create d: %w", err)
80+
t.Fatalf("could not create d: %s", err)
8181
}
8282

8383
for d.Next() {
@@ -114,7 +114,7 @@ func TestDecoderDoublePointer(t *testing.T) {
114114
t.Run(tc.name, func(t *testing.T) {
115115
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
116116
if err != nil {
117-
t.Fatalf("could not create d: %w", err)
117+
t.Fatalf("could not create d: %s", err)
118118
}
119119

120120
for d.Next() {
@@ -150,7 +150,7 @@ func TestDecoderInterface(t *testing.T) {
150150
t.Run(tc.name, func(t *testing.T) {
151151
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false})
152152
if err != nil {
153-
t.Fatalf("could not create d: %w", err)
153+
t.Fatalf("could not create d: %s", err)
154154
}
155155

156156
for d.Next() {

decoder_slice_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func TestIntSlice(t *testing.T) {
4646
t.Run(tc.name, func(t *testing.T) {
4747
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
4848
if err != nil {
49-
t.Fatalf("could not create d: %w", err)
49+
t.Fatalf("could not create d: %s", err)
5050
}
5151

5252
for d.Next() {
@@ -106,7 +106,7 @@ func TestMultiLevelIntSlice(t *testing.T) {
106106
t.Run(tc.name, func(t *testing.T) {
107107
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
108108
if err != nil {
109-
t.Fatalf("could not create d: %w", err)
109+
t.Fatalf("could not create d: %s", err)
110110
}
111111

112112
for d.Next() {
@@ -191,7 +191,7 @@ func TestStructSlice(t *testing.T) {
191191
t.Run(tc.name, func(t *testing.T) {
192192
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
193193
if err != nil {
194-
t.Fatalf("could not create d: %w", err)
194+
t.Fatalf("could not create d: %s", err)
195195
}
196196

197197
for d.Next() {

decoder_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ func TestIgnoreUnmatchingFields(t *testing.T) {
3636
scanTargets: []interface{}{&strVal, &intVal},
3737
expectedError: nil,
3838
},
39+
{
40+
name: "should work for a string containing a quote",
41+
config: Config{
42+
IgnoreUnmatchingFields: true,
43+
},
44+
data: "rec,2\"\n",
45+
scanTargets: []interface{}{&strVal, &strVal},
46+
expectedError: nil,
47+
},
3948
{
4049
name: "should work when numbers match with default config",
4150
config: Config{},
@@ -98,7 +107,7 @@ func TestIgnoreUnmatchingFields(t *testing.T) {
98107
t.Run(tc.name, func(t *testing.T) {
99108
d, err := NewWithConfig(strings.NewReader(tc.data), tc.config)
100109
if err != nil {
101-
t.Fatalf("could not create d: %w", err)
110+
t.Fatalf("could not create d: %s", err)
102111
}
103112

104113
for d.Next() {

escape_reader.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package csvdecoder
2+
3+
import (
4+
"io"
5+
"io/ioutil"
6+
"strings"
7+
"unicode"
8+
)
9+
10+
type readerCustomEscape struct {
11+
reader io.Reader
12+
}
13+
14+
const (
15+
// defaultEscapeChar is the character used by the encoding/csv package to escape a quote
16+
defaultEscapeChar = '"'
17+
quote = '"'
18+
)
19+
20+
// NewReaderWithCustomEscape creates a reader that uses a custom character as escape character
21+
// instead of the quote used by the encoding/csv Reader.
22+
func NewReaderWithCustomEscape(r io.Reader, escapeChar rune) (*readerCustomEscape, error) {
23+
b, err := ioutil.ReadAll(r)
24+
if err != nil {
25+
return nil, err
26+
}
27+
28+
tmpEscape := unicode.ReplacementChar // assuming this character doesn't appear in the string
29+
30+
// replace the escaped escape character as it should not influence any quote
31+
// for simplicity we temporarily replace the escaped escape chars with a special character
32+
s := strings.ReplaceAll(
33+
string(b),
34+
string([]rune{escapeChar, escapeChar}),
35+
string(tmpEscape),
36+
)
37+
38+
// replace the escaped quotes with the standard encoding/csv escape sequence
39+
s = strings.ReplaceAll(
40+
s,
41+
string([]rune{escapeChar, quote}),
42+
string([]rune{defaultEscapeChar, quote}),
43+
)
44+
45+
// replace the back the escaped escape character
46+
s = strings.ReplaceAll(
47+
s,
48+
string(tmpEscape),
49+
string([]rune{escapeChar, escapeChar}),
50+
)
51+
52+
return &readerCustomEscape{
53+
reader: strings.NewReader(s),
54+
}, nil
55+
}
56+
57+
func (r readerCustomEscape) Read(p []byte) (n int, err error) {
58+
return r.reader.Read(p)
59+
}

escape_reader_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package csvdecoder
2+
3+
import (
4+
"io/ioutil"
5+
"strings"
6+
"testing"
7+
)
8+
9+
func TestEscapeReader(t *testing.T) {
10+
for _, tc := range []struct {
11+
name string
12+
input string
13+
escapeChar rune
14+
expectedResult string
15+
}{
16+
{
17+
name: "should work without anything to escape",
18+
input: "my example string",
19+
escapeChar: '_',
20+
expectedResult: "my example string",
21+
},
22+
{
23+
name: "should replace escaping quotes",
24+
input: `my _"example_" string`,
25+
escapeChar: '_',
26+
expectedResult: `my ""example"" string`,
27+
},
28+
{
29+
name: "should not replace escaping chars without quotes",
30+
input: "my _example_ string",
31+
escapeChar: '_',
32+
expectedResult: "my _example_ string",
33+
},
34+
{
35+
name: "should ignore escaped escaped chars",
36+
input: `my example string__"`,
37+
escapeChar: '_',
38+
expectedResult: `my example string__"`,
39+
},
40+
} {
41+
tc := tc
42+
t.Run(tc.name, func(t *testing.T) {
43+
r, err := NewReaderWithCustomEscape(strings.NewReader(tc.input), tc.escapeChar)
44+
if err != nil {
45+
t.Fatal(err)
46+
}
47+
48+
result, err := ioutil.ReadAll(r)
49+
if err != nil {
50+
t.Fatal(err)
51+
}
52+
53+
if string(result) != tc.expectedResult {
54+
t.Errorf("expected value '%s' got '%s'", tc.expectedResult, result)
55+
}
56+
})
57+
}
58+
}

0 commit comments

Comments
 (0)