use of com.univocity.parsers.csv.CsvFormat in project backend by CatalogueOfLife.
the class DwcaReader method discoverSchemas.
/**
* First tries to find and read a meta.xml file.
* If none is found all potential txt files are scanned.
*
* @param termPrefix optional preferred term namespace prefix to use when looking up class & property terms
* @throws IOException
*/
@Override
protected void discoverSchemas(String termPrefix) throws IOException {
Path eml = resolve(EML_FN);
if (Files.exists(eml)) {
metadataFile = eml;
}
Path meta = resolve(META_FN);
if (Files.exists(meta)) {
readFromMeta(meta);
} else {
super.discoverSchemas(termPrefix);
// add artificial id terms for known rowType id pairs
for (Schema s : schemas.values()) {
if (!s.hasTerm(DwcaTerm.ID)) {
Optional<Term> idTerm = Optional.ofNullable(ROW_TYPE_TO_ID.getOrDefault(s.rowType, null));
if (idTerm.isPresent() && s.hasTerm(idTerm.get())) {
// create another id field with the same index
Schema.Field id = new Schema.Field(DwcaTerm.ID, s.field(idTerm.get()).index);
List<Schema.Field> columns = Lists.newArrayList(s.columns);
columns.add(id);
Schema s2 = new Schema(s.file, s.rowType, s.encoding, s.settings, columns);
updateSchema(s2);
}
}
}
// select core
if (size() == 1) {
coreRowType = schemas.keySet().iterator().next();
} else {
for (Term t : PREFERRED_CORE_TYPES) {
if (hasData(t)) {
coreRowType = t;
LOG.warn("{} data files found but no archive descriptor. Using {}", size(), coreRowType);
break;
}
}
if (coreRowType == null) {
// rather abort instead of picking randomly
throw new NormalizationFailedException.SourceInvalidException("Multiple unknown schemas found: " + Joiner.on(", ").join(schemas.keySet()));
}
}
}
CsvFormat format = coreSchema().settings.getFormat();
LOG.info("Found {} core [delim={} quote={}] and {} extensions", coreRowType, format.getDelimiter(), format.getQuote(), size() - 1);
}
use of com.univocity.parsers.csv.CsvFormat in project conquery by bakdata.
the class CSVConfig method createCsvFormat.
/**
* Helper method to generate format settings from the provided options in this class.
* @return Format object that can be passed into {@link CsvWriterSettings} and {@link CsvParserSettings}.
*/
private CsvFormat createCsvFormat() {
CsvFormat format = new CsvFormat();
format.setQuoteEscape(getEscape());
format.setCharToEscapeQuoteEscaping(getEscape());
format.setComment(getComment());
format.setDelimiter(getDelimeter());
format.setLineSeparator(getLineSeparator());
format.setQuote(getQuote());
return format;
}
use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.
the class CsvFileWriter method writeTable.
public void writeTable(ITable table) {
try {
Schema schema = table.getSchema();
List<IColumn> cols = table.getLoadedColumns(schema.getColumnNames());
CsvWriterSettings settings = new CsvWriterSettings();
CsvFormat format = new CsvFormat();
format.setDelimiter(this.separator);
settings.setFormat(format);
settings.setEmptyValue("\"\"");
settings.setNullValue(null);
OutputStream output;
FileOutputStream fs = null;
if (this.compress) {
String fn = this.fileName;
if (!this.fileName.endsWith(".gz"))
fn += ".gz";
fs = new FileOutputStream(fn);
output = new GZIPOutputStream(fs);
} else {
output = new FileOutputStream(this.fileName);
}
CsvWriter writer = new CsvWriter(output, settings);
String[] data = new String[schema.getColumnCount()];
int index = 0;
for (String c : schema.getColumnNames()) {
data[index] = c;
index++;
}
if (this.writeHeaderRow)
writer.writeHeaders(data);
IRowIterator rowIter = table.getMembershipSet().getIterator();
int nextRow = rowIter.getNextRow();
while (nextRow >= 0) {
for (index = 0; index < cols.size(); index++) {
IColumn colI = cols.get(index);
String d = colI.isMissing(nextRow) ? null : colI.asString(nextRow);
data[index] = d;
}
writer.writeRow(data);
nextRow = rowIter.getNextRow();
}
writer.close();
output.close();
if (fs != null)
fs.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of com.univocity.parsers.csv.CsvFormat in project knime-base by knime.
the class CSVFormatAutoDetectionSwingWorker method doneWithContext.
@Override
protected void doneWithContext() {
boolean refreshPreview = false;
try {
final CsvFormat detectedFormat = get();
m_dialog.updateAutodetectionFields(detectedFormat);
refreshPreview = true;
m_dialog.setStatus("Successfully autodetected!", null, SharedIcons.SUCCESS.get());
} catch (final ExecutionException e) {
final Throwable cause = e.getCause();
if (cause != null) {
if (cause instanceof IOException || cause.getCause() instanceof IOException) {
m_dialog.setStatus(IO_ERROR, STATUS_TOOLTIP_TEXT, SharedIcons.ERROR.get());
LOGGER.warn(e.getMessage(), e);
} else {
m_dialog.setStatus(AUTO_DETECTION_ERROR, STATUS_TOOLTIP_TEXT, SharedIcons.ERROR.get());
LOGGER.warn(e.getMessage(), e);
}
}
} catch (InterruptedException | CancellationException ex) {
// ignore
} finally {
m_dialog.resetUIafterAutodetection();
// always call m_dialog#refreshPreview, it enables the preview
m_dialog.refreshPreview(refreshPreview);
}
}
use of com.univocity.parsers.csv.CsvFormat in project knime-base by knime.
the class CSVFormatAutoDetectionSwingWorker method getCsvParserSettings.
private static CsvParserSettings getCsvParserSettings(final String comment, final int inputBufferSize) {
final CsvFormat defaultFormat = new CsvFormat();
final char charComment = !comment.isEmpty() ? comment.charAt(0) : '\0';
defaultFormat.setComment(charComment);
final CsvParserSettings settings = new CsvParserSettings();
settings.setInputBufferSize(inputBufferSize);
settings.setReadInputOnSeparateThread(false);
settings.setFormat(defaultFormat);
settings.detectFormatAutomatically();
return settings;
}
Aggregations