Search in sources :

Example 1 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project backend by CatalogueOfLife.

the class DwcaReader method discoverSchemas.

/**
 * First tries to find and read a meta.xml file.
 * If none is found all potential txt files are scanned.
 *
 * @param termPrefix optional preferred term namespace prefix to use when looking up class & property terms
 * @throws IOException
 */
@Override
protected void discoverSchemas(String termPrefix) throws IOException {
    Path eml = resolve(EML_FN);
    if (Files.exists(eml)) {
        metadataFile = eml;
    }
    Path meta = resolve(META_FN);
    if (Files.exists(meta)) {
        readFromMeta(meta);
    } else {
        super.discoverSchemas(termPrefix);
        // add artificial id terms for known rowType id pairs
        for (Schema s : schemas.values()) {
            if (!s.hasTerm(DwcaTerm.ID)) {
                Optional<Term> idTerm = Optional.ofNullable(ROW_TYPE_TO_ID.getOrDefault(s.rowType, null));
                if (idTerm.isPresent() && s.hasTerm(idTerm.get())) {
                    // create another id field with the same index
                    Schema.Field id = new Schema.Field(DwcaTerm.ID, s.field(idTerm.get()).index);
                    List<Schema.Field> columns = Lists.newArrayList(s.columns);
                    columns.add(id);
                    Schema s2 = new Schema(s.file, s.rowType, s.encoding, s.settings, columns);
                    updateSchema(s2);
                }
            }
        }
        // select core
        if (size() == 1) {
            coreRowType = schemas.keySet().iterator().next();
        } else {
            for (Term t : PREFERRED_CORE_TYPES) {
                if (hasData(t)) {
                    coreRowType = t;
                    LOG.warn("{} data files found but no archive descriptor. Using {}", size(), coreRowType);
                    break;
                }
            }
            if (coreRowType == null) {
                // rather abort instead of picking randomly
                throw new NormalizationFailedException.SourceInvalidException("Multiple unknown schemas found: " + Joiner.on(", ").join(schemas.keySet()));
            }
        }
    }
    CsvFormat format = coreSchema().settings.getFormat();
    LOG.info("Found {} core [delim={} quote={}] and {} extensions", coreRowType, format.getDelimiter(), format.getQuote(), size() - 1);
}
Also used : Path(java.nio.file.Path) Schema(life.catalogue.csv.Schema) CsvFormat(com.univocity.parsers.csv.CsvFormat) ColdpTerm(life.catalogue.coldp.ColdpTerm) DwcUnofficialTerm(life.catalogue.coldp.DwcUnofficialTerm)

Example 2 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project conquery by bakdata.

the class CSVConfig method createCsvFormat.

/**
 * Helper method to generate format settings from the provided options in this class.
 * @return Format object that can be passed into {@link CsvWriterSettings} and {@link CsvParserSettings}.
 */
private CsvFormat createCsvFormat() {
    CsvFormat format = new CsvFormat();
    format.setQuoteEscape(getEscape());
    format.setCharToEscapeQuoteEscaping(getEscape());
    format.setComment(getComment());
    format.setDelimiter(getDelimeter());
    format.setLineSeparator(getLineSeparator());
    format.setQuote(getQuote());
    return format;
}
Also used : CsvFormat(com.univocity.parsers.csv.CsvFormat)

Example 3 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.

the class CsvFileWriter method writeTable.

public void writeTable(ITable table) {
    try {
        Schema schema = table.getSchema();
        List<IColumn> cols = table.getLoadedColumns(schema.getColumnNames());
        CsvWriterSettings settings = new CsvWriterSettings();
        CsvFormat format = new CsvFormat();
        format.setDelimiter(this.separator);
        settings.setFormat(format);
        settings.setEmptyValue("\"\"");
        settings.setNullValue(null);
        OutputStream output;
        FileOutputStream fs = null;
        if (this.compress) {
            String fn = this.fileName;
            if (!this.fileName.endsWith(".gz"))
                fn += ".gz";
            fs = new FileOutputStream(fn);
            output = new GZIPOutputStream(fs);
        } else {
            output = new FileOutputStream(this.fileName);
        }
        CsvWriter writer = new CsvWriter(output, settings);
        String[] data = new String[schema.getColumnCount()];
        int index = 0;
        for (String c : schema.getColumnNames()) {
            data[index] = c;
            index++;
        }
        if (this.writeHeaderRow)
            writer.writeHeaders(data);
        IRowIterator rowIter = table.getMembershipSet().getIterator();
        int nextRow = rowIter.getNextRow();
        while (nextRow >= 0) {
            for (index = 0; index < cols.size(); index++) {
                IColumn colI = cols.get(index);
                String d = colI.isMissing(nextRow) ? null : colI.asString(nextRow);
                data[index] = d;
            }
            writer.writeRow(data);
            nextRow = rowIter.getNextRow();
        }
        writer.close();
        output.close();
        if (fs != null)
            fs.close();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : CsvWriter(com.univocity.parsers.csv.CsvWriter) Schema(org.hillview.table.Schema) GZIPOutputStream(java.util.zip.GZIPOutputStream) CsvFormat(com.univocity.parsers.csv.CsvFormat) GZIPOutputStream(java.util.zip.GZIPOutputStream) CsvWriterSettings(com.univocity.parsers.csv.CsvWriterSettings)

Example 4 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project knime-base by knime.

the class CSVFormatAutoDetectionSwingWorker method doneWithContext.

@Override
protected void doneWithContext() {
    boolean refreshPreview = false;
    try {
        final CsvFormat detectedFormat = get();
        m_dialog.updateAutodetectionFields(detectedFormat);
        refreshPreview = true;
        m_dialog.setStatus("Successfully autodetected!", null, SharedIcons.SUCCESS.get());
    } catch (final ExecutionException e) {
        final Throwable cause = e.getCause();
        if (cause != null) {
            if (cause instanceof IOException || cause.getCause() instanceof IOException) {
                m_dialog.setStatus(IO_ERROR, STATUS_TOOLTIP_TEXT, SharedIcons.ERROR.get());
                LOGGER.warn(e.getMessage(), e);
            } else {
                m_dialog.setStatus(AUTO_DETECTION_ERROR, STATUS_TOOLTIP_TEXT, SharedIcons.ERROR.get());
                LOGGER.warn(e.getMessage(), e);
            }
        }
    } catch (InterruptedException | CancellationException ex) {
    // ignore
    } finally {
        m_dialog.resetUIafterAutodetection();
        // always call m_dialog#refreshPreview, it enables the preview
        m_dialog.refreshPreview(refreshPreview);
    }
}
Also used : CancellationException(java.util.concurrent.CancellationException) CsvFormat(com.univocity.parsers.csv.CsvFormat) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 5 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project knime-base by knime.

the class CSVFormatAutoDetectionSwingWorker method getCsvParserSettings.

private static CsvParserSettings getCsvParserSettings(final String comment, final int inputBufferSize) {
    final CsvFormat defaultFormat = new CsvFormat();
    final char charComment = !comment.isEmpty() ? comment.charAt(0) : '\0';
    defaultFormat.setComment(charComment);
    final CsvParserSettings settings = new CsvParserSettings();
    settings.setInputBufferSize(inputBufferSize);
    settings.setReadInputOnSeparateThread(false);
    settings.setFormat(defaultFormat);
    settings.detectFormatAutomatically();
    return settings;
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) CsvFormat(com.univocity.parsers.csv.CsvFormat)

Aggregations

CsvFormat (com.univocity.parsers.csv.CsvFormat)10 CsvWriterSettings (com.univocity.parsers.csv.CsvWriterSettings)5 CsvParserSettings (com.univocity.parsers.csv.CsvParserSettings)3 CsvWriter (com.univocity.parsers.csv.CsvWriter)3 Schema (org.hillview.table.Schema)2 CsvParser (com.univocity.parsers.csv.CsvParser)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 IOException (java.io.IOException)1 Writer (java.io.Writer)1 Path (java.nio.file.Path)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 Nullable (javax.annotation.Nullable)1 ColdpTerm (life.catalogue.coldp.ColdpTerm)1 DwcUnofficialTerm (life.catalogue.coldp.DwcUnofficialTerm)1 Schema (life.catalogue.csv.Schema)1 CsvFileWriter (org.hillview.storage.CsvFileWriter)1 ColumnDescription (org.hillview.table.ColumnDescription)1