Search in sources :

Example 6 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project droid by digital-preservation.

the class CsvItemWriter method open.

@Override
public void open(final Writer writer) {
    final CsvWriterSettings csvWriterSettings = new CsvWriterSettings();
    csvWriterSettings.setQuoteAllFields(true);
    CsvFormat format = new CsvFormat();
    // following Unix convention on line separators as previously
    format.setLineSeparator("\n");
    csvWriterSettings.setFormat(format);
    csvWriter = new CsvWriter(writer, csvWriterSettings);
    if (headers == null) {
        headers = HEADERS;
    }
    csvWriter.writeHeaders(headers);
}
Also used : CsvWriter(com.univocity.parsers.csv.CsvWriter) CsvWriterSettings(com.univocity.parsers.csv.CsvWriterSettings) CsvFormat(com.univocity.parsers.csv.CsvFormat)

Example 7 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.

the class CsvFileTest method csvWriterTest.

@Test
public void csvWriterTest() throws IOException {
    // The Csv writer (Univocity) we were using had a bug,
    // reproduced with this test.
    String[] data = new String[] { "", null };
    CsvWriterSettings settings = new CsvWriterSettings();
    CsvFormat format = new CsvFormat();
    settings.setFormat(format);
    settings.setEmptyValue("\"\"");
    settings.setNullValue("");
    String fileName = "tmp.csv";
    Writer fw = new FileWriter(fileName);
    CsvWriter writer = new CsvWriter(fw, settings);
    writer.writeRow(data);
    writer.close();
    fw.close();
    File file = new File(fileName);
    if (file.exists()) {
        @SuppressWarnings("unused") boolean ignored = file.delete();
    }
}
Also used : CsvWriter(com.univocity.parsers.csv.CsvWriter) FileWriter(java.io.FileWriter) CsvFileWriter(org.hillview.storage.CsvFileWriter) CsvWriterSettings(com.univocity.parsers.csv.CsvWriterSettings) CsvFormat(com.univocity.parsers.csv.CsvFormat) File(java.io.File) FileWriter(java.io.FileWriter) CsvWriter(com.univocity.parsers.csv.CsvWriter) CsvFileWriter(org.hillview.storage.CsvFileWriter) Writer(java.io.Writer) BaseTest(org.hillview.test.BaseTest) Test(org.junit.Test)

Example 8 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project hillview by vmware.

the class CsvFileLoader method prepareLoading.

@Override
public void prepareLoading() {
    this.actualSchema = this.schema.getSchema();
    this.file = this.getFileReader();
    CsvParserSettings settings = new CsvParserSettings();
    CsvFormat format = new CsvFormat();
    format.setDelimiter(this.configuration.separator);
    settings.setFormat(format);
    settings.setIgnoreTrailingWhitespaces(true);
    settings.setEmptyValue("");
    settings.setNullValue(null);
    settings.setReadInputOnSeparateThread(false);
    if (this.actualSchema != null)
        settings.setMaxColumns(this.actualSchema.getColumnCount());
    else
        settings.setMaxColumns(50000);
    settings.setMaxCharsPerColumn(100000);
    this.reader = new CsvParser(settings);
    this.reader.beginParsing(file);
    if (this.configuration.hasHeaderRow) {
        @Nullable String[] line = null;
        try {
            line = this.reader.parseNext();
        } catch (Exception ex) {
            this.error(ex.getMessage());
        }
        if (line == null)
            throw new RuntimeException("Missing header row " + this.filename);
        if (this.actualSchema == null) {
            HillviewLogger.instance.info("Creating schema");
            this.actualSchema = new Schema();
            int index = 0;
            for (String col : line) {
                if ((col == null) || col.isEmpty())
                    col = this.actualSchema.newColumnName("Column_" + index);
                col = this.actualSchema.newColumnName(col);
                ColumnDescription cd = new ColumnDescription(col, ContentsKind.String);
                this.actualSchema.append(cd);
                index++;
            }
        } else {
            this.currentRow++;
        }
    }
    if (this.actualSchema == null) {
        int columnCount;
        this.actualSchema = new Schema();
        this.firstLine = reader.parseNext();
        if (this.firstLine == null)
            throw new RuntimeException("Cannot create schema from empty CSV file");
        columnCount = this.firstLine.length;
        for (int i = 0; i < columnCount; i++) {
            ColumnDescription cd = new ColumnDescription("Column " + i, ContentsKind.String);
            this.actualSchema.append(cd);
        }
    }
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) ColumnDescription(org.hillview.table.ColumnDescription) GuessSchema(org.hillview.table.rows.GuessSchema) LazySchema(org.hillview.table.LazySchema) Schema(org.hillview.table.Schema) CsvFormat(com.univocity.parsers.csv.CsvFormat) CsvParser(com.univocity.parsers.csv.CsvParser) Nullable(javax.annotation.Nullable)

Example 9 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project dsbulk by datastax.

the class CSVConnector method init.

@Override
public void init() throws URISyntaxException, IOException {
    super.init();
    CsvFormat format = new CsvFormat();
    format.setDelimiter(delimiter);
    format.setQuote(quote);
    format.setQuoteEscape(escape);
    format.setComment(comment);
    boolean autoNewline = AUTO_NEWLINE.equalsIgnoreCase(newline);
    if (read) {
        parserSettings = new CsvParserSettings();
        parserSettings.setFormat(format);
        parserSettings.setNullValue(AUTO.equalsIgnoreCase(nullValue) ? null : nullValue);
        parserSettings.setEmptyValue(AUTO.equalsIgnoreCase(emptyValue) ? "" : emptyValue);
        // do not use this feature as the parser throws an error if the file
        // has fewer lines than skipRecords;
        // we'll use the skip() operator instead.
        // parserSettings.setNumberOfRowsToSkip(skipRecords);
        parserSettings.setHeaderExtractionEnabled(header);
        parserSettings.setMaxCharsPerColumn(maxCharsPerColumn);
        parserSettings.setMaxColumns(maxColumns);
        parserSettings.setNormalizeLineEndingsWithinQuotes(normalizeLineEndingsInQuotes);
        parserSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespaces);
        parserSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhitespaces);
        parserSettings.setIgnoreLeadingWhitespacesInQuotes(ignoreLeadingWhitespacesInQuotes);
        parserSettings.setIgnoreTrailingWhitespacesInQuotes(ignoreTrailingWhitespacesInQuotes);
        if (autoNewline) {
            parserSettings.setLineSeparatorDetectionEnabled(true);
        } else {
            format.setLineSeparator(newline);
        }
    } else {
        writerSettings = new CsvWriterSettings();
        writerSettings.setFormat(format);
        writerSettings.setNullValue(AUTO.equalsIgnoreCase(nullValue) ? null : nullValue);
        // DAT-605: use empty quoted fields by default to distinguish empty strings from nulls
        writerSettings.setEmptyValue(AUTO.equalsIgnoreCase(emptyValue) ? "" + quote + quote : emptyValue);
        writerSettings.setQuoteEscapingEnabled(true);
        writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespaces);
        writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhitespaces);
        writerSettings.setMaxColumns(maxColumns);
        writerSettings.setNormalizeLineEndingsWithinQuotes(normalizeLineEndingsInQuotes);
        if (autoNewline) {
            format.setLineSeparator(System.lineSeparator());
        } else {
            format.setLineSeparator(newline);
        }
    }
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) CsvFormat(com.univocity.parsers.csv.CsvFormat) CsvWriterSettings(com.univocity.parsers.csv.CsvWriterSettings)

Example 10 with CsvFormat

use of com.univocity.parsers.csv.CsvFormat in project drill by apache.

the class TextRecordWriter method init.

@Override
public void init(Map<String, String> writerOptions) throws IOException {
    this.location = writerOptions.get("location");
    this.prefix = writerOptions.get("prefix");
    this.fs = FileSystem.get(fsConf);
    String extension = writerOptions.get("extension");
    this.extension = extension == null ? "" : "." + extension;
    this.fileNumberIndex = 0;
    CsvWriterSettings writerSettings = new CsvWriterSettings();
    writerSettings.setMaxColumns(TextFormatPlugin.MAXIMUM_NUMBER_COLUMNS);
    writerSettings.setMaxCharsPerColumn(TextFormatPlugin.MAX_CHARS_PER_COLUMN);
    writerSettings.setHeaderWritingEnabled(Boolean.parseBoolean(writerOptions.get("addHeader")));
    writerSettings.setQuoteAllFields(Boolean.parseBoolean(writerOptions.get("forceQuotes")));
    CsvFormat format = writerSettings.getFormat();
    format.setLineSeparator(writerOptions.get("lineSeparator"));
    format.setDelimiter(writerOptions.get("fieldDelimiter"));
    format.setQuote(writerOptions.get("quote").charAt(0));
    format.setQuoteEscape(writerOptions.get("escape").charAt(0));
    // do not escape "escape" char
    format.setCharToEscapeQuoteEscaping(TextFormatPlugin.NULL_CHAR);
    this.writerSettings = writerSettings;
    logger.trace("Text writer settings: {}", this.writerSettings);
}
Also used : CsvWriterSettings(com.univocity.parsers.csv.CsvWriterSettings) CsvFormat(com.univocity.parsers.csv.CsvFormat)

Aggregations

CsvFormat (com.univocity.parsers.csv.CsvFormat)10 CsvWriterSettings (com.univocity.parsers.csv.CsvWriterSettings)5 CsvParserSettings (com.univocity.parsers.csv.CsvParserSettings)3 CsvWriter (com.univocity.parsers.csv.CsvWriter)3 Schema (org.hillview.table.Schema)2 CsvParser (com.univocity.parsers.csv.CsvParser)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 IOException (java.io.IOException)1 Writer (java.io.Writer)1 Path (java.nio.file.Path)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 Nullable (javax.annotation.Nullable)1 ColdpTerm (life.catalogue.coldp.ColdpTerm)1 DwcUnofficialTerm (life.catalogue.coldp.DwcUnofficialTerm)1 Schema (life.catalogue.csv.Schema)1 CsvFileWriter (org.hillview.storage.CsvFileWriter)1 ColumnDescription (org.hillview.table.ColumnDescription)1