use of com.univocity.parsers.csv.CsvFormat in project droid by digital-preservation.
the class CsvItemWriter method open.
@Override
public void open(final Writer writer) {
final CsvWriterSettings csvWriterSettings = new CsvWriterSettings();
csvWriterSettings.setQuoteAllFields(true);
CsvFormat format = new CsvFormat();
// following Unix convention on line separators as previously
format.setLineSeparator("\n");
csvWriterSettings.setFormat(format);
csvWriter = new CsvWriter(writer, csvWriterSettings);
if (headers == null) {
headers = HEADERS;
}
csvWriter.writeHeaders(headers);
}
use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.
the class CsvFileTest method csvWriterTest.
@Test
public void csvWriterTest() throws IOException {
// The Csv writer (Univocity) we were using had a bug,
// reproduced with this test.
String[] data = new String[] { "", null };
CsvWriterSettings settings = new CsvWriterSettings();
CsvFormat format = new CsvFormat();
settings.setFormat(format);
settings.setEmptyValue("\"\"");
settings.setNullValue("");
String fileName = "tmp.csv";
Writer fw = new FileWriter(fileName);
CsvWriter writer = new CsvWriter(fw, settings);
writer.writeRow(data);
writer.close();
fw.close();
File file = new File(fileName);
if (file.exists()) {
@SuppressWarnings("unused") boolean ignored = file.delete();
}
}
use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.
the class CsvFileLoader method prepareLoading.
@Override
public void prepareLoading() {
this.actualSchema = this.schema.getSchema();
this.file = this.getFileReader();
CsvParserSettings settings = new CsvParserSettings();
CsvFormat format = new CsvFormat();
format.setDelimiter(this.configuration.separator);
settings.setFormat(format);
settings.setIgnoreTrailingWhitespaces(true);
settings.setEmptyValue("");
settings.setNullValue(null);
settings.setReadInputOnSeparateThread(false);
if (this.actualSchema != null)
settings.setMaxColumns(this.actualSchema.getColumnCount());
else
settings.setMaxColumns(50000);
settings.setMaxCharsPerColumn(100000);
this.reader = new CsvParser(settings);
this.reader.beginParsing(file);
if (this.configuration.hasHeaderRow) {
@Nullable String[] line = null;
try {
line = this.reader.parseNext();
} catch (Exception ex) {
this.error(ex.getMessage());
}
if (line == null)
throw new RuntimeException("Missing header row " + this.filename);
if (this.actualSchema == null) {
HillviewLogger.instance.info("Creating schema");
this.actualSchema = new Schema();
int index = 0;
for (String col : line) {
if ((col == null) || col.isEmpty())
col = this.actualSchema.newColumnName("Column_" + index);
col = this.actualSchema.newColumnName(col);
ColumnDescription cd = new ColumnDescription(col, ContentsKind.String);
this.actualSchema.append(cd);
index++;
}
} else {
this.currentRow++;
}
}
if (this.actualSchema == null) {
int columnCount;
this.actualSchema = new Schema();
this.firstLine = reader.parseNext();
if (this.firstLine == null)
throw new RuntimeException("Cannot create schema from empty CSV file");
columnCount = this.firstLine.length;
for (int i = 0; i < columnCount; i++) {
ColumnDescription cd = new ColumnDescription("Column " + i, ContentsKind.String);
this.actualSchema.append(cd);
}
}
}
use of com.univocity.parsers.csv.CsvFormat in project dsbulk by datastax.
the class CSVConnector method init.
@Override
public void init() throws URISyntaxException, IOException {
super.init();
CsvFormat format = new CsvFormat();
format.setDelimiter(delimiter);
format.setQuote(quote);
format.setQuoteEscape(escape);
format.setComment(comment);
boolean autoNewline = AUTO_NEWLINE.equalsIgnoreCase(newline);
if (read) {
parserSettings = new CsvParserSettings();
parserSettings.setFormat(format);
parserSettings.setNullValue(AUTO.equalsIgnoreCase(nullValue) ? null : nullValue);
parserSettings.setEmptyValue(AUTO.equalsIgnoreCase(emptyValue) ? "" : emptyValue);
// do not use this feature as the parser throws an error if the file
// has fewer lines than skipRecords;
// we'll use the skip() operator instead.
// parserSettings.setNumberOfRowsToSkip(skipRecords);
parserSettings.setHeaderExtractionEnabled(header);
parserSettings.setMaxCharsPerColumn(maxCharsPerColumn);
parserSettings.setMaxColumns(maxColumns);
parserSettings.setNormalizeLineEndingsWithinQuotes(normalizeLineEndingsInQuotes);
parserSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespaces);
parserSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhitespaces);
parserSettings.setIgnoreLeadingWhitespacesInQuotes(ignoreLeadingWhitespacesInQuotes);
parserSettings.setIgnoreTrailingWhitespacesInQuotes(ignoreTrailingWhitespacesInQuotes);
if (autoNewline) {
parserSettings.setLineSeparatorDetectionEnabled(true);
} else {
format.setLineSeparator(newline);
}
} else {
writerSettings = new CsvWriterSettings();
writerSettings.setFormat(format);
writerSettings.setNullValue(AUTO.equalsIgnoreCase(nullValue) ? null : nullValue);
// DAT-605: use empty quoted fields by default to distinguish empty strings from nulls
writerSettings.setEmptyValue(AUTO.equalsIgnoreCase(emptyValue) ? "" + quote + quote : emptyValue);
writerSettings.setQuoteEscapingEnabled(true);
writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespaces);
writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhitespaces);
writerSettings.setMaxColumns(maxColumns);
writerSettings.setNormalizeLineEndingsWithinQuotes(normalizeLineEndingsInQuotes);
if (autoNewline) {
format.setLineSeparator(System.lineSeparator());
} else {
format.setLineSeparator(newline);
}
}
}
use of com.univocity.parsers.csv.CsvFormat in project drill by apache.
the class TextRecordWriter method init.
@Override
public void init(Map<String, String> writerOptions) throws IOException {
this.location = writerOptions.get("location");
this.prefix = writerOptions.get("prefix");
this.fs = FileSystem.get(fsConf);
String extension = writerOptions.get("extension");
this.extension = extension == null ? "" : "." + extension;
this.fileNumberIndex = 0;
CsvWriterSettings writerSettings = new CsvWriterSettings();
writerSettings.setMaxColumns(TextFormatPlugin.MAXIMUM_NUMBER_COLUMNS);
writerSettings.setMaxCharsPerColumn(TextFormatPlugin.MAX_CHARS_PER_COLUMN);
writerSettings.setHeaderWritingEnabled(Boolean.parseBoolean(writerOptions.get("addHeader")));
writerSettings.setQuoteAllFields(Boolean.parseBoolean(writerOptions.get("forceQuotes")));
CsvFormat format = writerSettings.getFormat();
format.setLineSeparator(writerOptions.get("lineSeparator"));
format.setDelimiter(writerOptions.get("fieldDelimiter"));
format.setQuote(writerOptions.get("quote").charAt(0));
format.setQuoteEscape(writerOptions.get("escape").charAt(0));
// do not escape "escape" char
format.setCharToEscapeQuoteEscaping(TextFormatPlugin.NULL_CHAR);
this.writerSettings = writerSettings;
logger.trace("Text writer settings: {}", this.writerSettings);
}
Aggregations