Skip to content

Commit 6bccbc2

Browse files
author
eberhardtj
committed
Replace Encoder with SimpleEncoder
The currently provided encoder implementation does not handle multi-value columns and the option to include a header or record ids to the output.
1 parent 0763af1 commit 6bccbc2

File tree

2 files changed

+247
-142
lines changed

2 files changed

+247
-142
lines changed

src/main/java/org/metafacture/csv/CsvLiteralEncoder.java

Lines changed: 0 additions & 142 deletions
This file was deleted.
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
/*
2+
* Copyright 2018 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.metafacture.csv;
17+
18+
import com.opencsv.CSVWriter;
19+
import com.opencsv.CSVWriterBuilder;
20+
import com.opencsv.ICSVWriter;
21+
import org.metafacture.framework.FluxCommand;
22+
import org.metafacture.framework.MetafactureException;
23+
import org.metafacture.framework.ObjectReceiver;
24+
import org.metafacture.framework.StreamReceiver;
25+
import org.metafacture.framework.annotations.Description;
26+
import org.metafacture.framework.annotations.In;
27+
import org.metafacture.framework.annotations.Out;
28+
import org.metafacture.framework.helpers.DefaultStreamPipe;
29+
30+
import java.io.IOException;
31+
import java.io.StringWriter;
32+
import java.util.ArrayList;
33+
import java.util.List;
34+
import java.util.stream.Collectors;
35+
36+
/**
37+
* A csv encoder that converts a record into a csv line (Default separator: comma).
38+
*
39+
* <P>
40+
* Each record represents a row. Each literal value represents a column value.
41+
* If a sequence of literals occur share the same name, a nested csv record is used as column value.
42+
* </P>
43+
*/
44+
@Description("Encodes each value in a record as a csv row.")
45+
@In(StreamReceiver.class)
46+
@Out(String.class)
47+
@FluxCommand("encode-csv")
48+
public class SimpleCsvEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
49+
50+
private CSVWriter csvWriter;
51+
private StringWriter writer;
52+
53+
/** List of items that will be written to a row */
54+
private List<String> rowItems;
55+
/** Last encountered literal name */
56+
private String lastLiteralName;
57+
/** List of literal values that has the same name */
58+
private List<String> literalValues;
59+
/** Flag for the first record encounter */
60+
private boolean isFirstRecord;
61+
/** Flag for the first literal encounter in a record */
62+
private boolean isFirstLiteral;
63+
64+
private List<String> header;
65+
private char separator;
66+
private boolean includeHeader;
67+
private boolean includeRecordId;
68+
69+
public SimpleCsvEncoder() {
70+
this(CSVWriter.DEFAULT_SEPARATOR);
71+
}
72+
73+
public SimpleCsvEncoder(char separator) {
74+
this.separator = separator;
75+
this.includeRecordId = false;
76+
this.includeHeader = false;
77+
this.header = new ArrayList<>();
78+
79+
this.isFirstRecord = true;
80+
this.isFirstLiteral = true;
81+
82+
this.rowItems = new ArrayList<>();
83+
this.lastLiteralName = null;
84+
this.literalValues = new ArrayList<>();
85+
}
86+
87+
/**
88+
* Start each line with the record id.
89+
*/
90+
public void setIncludeRecordId(boolean includeRecordId) {
91+
this.includeRecordId = includeRecordId;
92+
}
93+
94+
/**
95+
* Add a column description header.
96+
*/
97+
public void setIncludeHeader(boolean includeHeader) {
98+
this.includeHeader = includeHeader;
99+
}
100+
101+
public void setSeparator(String separator) {
102+
if (separator.length() > 1) {
103+
throw new MetafactureException("Separator needs to be a single character.");
104+
}
105+
this.separator = separator.charAt(0);
106+
}
107+
108+
public void setSeparator(char separator) {
109+
this.separator = separator;
110+
}
111+
112+
private void initialize() {
113+
writer = new StringWriter();
114+
String emptyLineEnd = "";
115+
csvWriter = new CSVWriter(writer,
116+
separator,
117+
CSVWriter.DEFAULT_QUOTE_CHARACTER,
118+
CSVWriter.DEFAULT_ESCAPE_CHARACTER,
119+
emptyLineEnd);
120+
}
121+
122+
private String[] arrayOf(List<String> list) {
123+
int length = list.size();
124+
return list.toArray(new String[length]);
125+
}
126+
127+
private String innerRowOf(List<String> items) {
128+
StringWriter writer = new StringWriter();
129+
ICSVWriter csvWriter = new CSVWriterBuilder(writer)
130+
.withSeparator(separator)
131+
.withQuoteChar(CSVWriter.DEFAULT_QUOTE_CHARACTER)
132+
.withLineEnd("")
133+
.build();
134+
135+
String row[] = arrayOf(items);
136+
csvWriter.writeNext(row);
137+
String line = writer.toString().trim();
138+
return line;
139+
}
140+
141+
private void resetCaches() {
142+
isFirstLiteral = true;
143+
literalValues = new ArrayList<>();
144+
rowItems = new ArrayList<>();
145+
}
146+
147+
private void writeRow(List<String> rowItems) {
148+
String[] row = arrayOf(rowItems);
149+
csvWriter.writeNext(row);
150+
String line = writer.toString();
151+
getReceiver().process(line);
152+
153+
writer.getBuffer().setLength(0);
154+
}
155+
156+
@Override
157+
public void startRecord(final String identifier) {
158+
if (isFirstRecord) {
159+
initialize();
160+
if (includeRecordId) {
161+
header.add("record id");
162+
}
163+
}
164+
165+
rowItems = new ArrayList<>();
166+
167+
if (includeRecordId) {
168+
rowItems.add(identifier);
169+
}
170+
}
171+
172+
@Override
173+
public void endRecord() {
174+
if (isFirstRecord) {
175+
if (includeHeader) {
176+
List<String> uniqueHeader = header.stream().distinct().collect(Collectors.toList());
177+
writeRow(uniqueHeader);
178+
header.clear();
179+
}
180+
isFirstRecord = false;
181+
}
182+
183+
String rowItem = literalValues.size() == 1 ? literalValues.get(0) : innerRowOf(literalValues);
184+
rowItems.add(rowItem);
185+
186+
writeRow(rowItems);
187+
188+
resetCaches();
189+
}
190+
191+
@Override
192+
public void literal(final String name, final String value) {
193+
if (isFirstRecord) {
194+
header.add(name);
195+
}
196+
197+
if (isFirstLiteral) {
198+
lastLiteralName = name;
199+
isFirstLiteral = false;
200+
}
201+
202+
if (name.equals(lastLiteralName)) {
203+
literalValues.add(value);
204+
} else {
205+
String rowItem = literalValues.size() == 1 ? literalValues.get(0) : innerRowOf(literalValues);
206+
rowItems.add(rowItem);
207+
208+
literalValues = new ArrayList<>();
209+
literalValues.add(value);
210+
}
211+
212+
lastLiteralName = name;
213+
}
214+
215+
@Override
216+
public void onCloseStream() {
217+
try {
218+
csvWriter.close();
219+
} catch (IOException e) {
220+
throw new MetafactureException(e);
221+
}
222+
}
223+
224+
@Override
225+
public void onResetStream() {
226+
this.includeRecordId = false;
227+
this.includeHeader = false;
228+
this.header = new ArrayList<>();
229+
230+
this.isFirstRecord = true;
231+
this.isFirstLiteral = true;
232+
233+
this.rowItems = new ArrayList<>();
234+
this.lastLiteralName = null;
235+
this.literalValues = new ArrayList<>();
236+
}
237+
238+
@Override
239+
public void startEntity(final String name) {
240+
// Ignore
241+
}
242+
243+
@Override
244+
public void endEntity() {
245+
// Ignore
246+
}
247+
}

0 commit comments

Comments
 (0)