Search in sources :

Example 41 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project kylo by Teradata.

the class CSVFileSchemaParser method createCSVFormat.

private CSVFormat createCSVFormat(String sampleData) throws IOException {
    CSVFormat format;
    if (autoDetect) {
        CSVAutoDetect autoDetect = new CSVAutoDetect();
        if (bufferSize == Integer.MIN_VALUE) {
            // init buffer size from Spring configuration
            SpringApplicationContext.autowire(this);
        }
        if (bufferSize > 0) {
            autoDetect.setBufferSize(bufferSize);
        }
        format = autoDetect.detectCSVFormat(sampleData, this.headerRow, this.separatorChar);
        this.separatorChar = Character.toString(format.getDelimiter());
        this.quoteChar = Character.toString(format.getQuoteCharacter());
    } else {
        format = CSVFormat.DEFAULT.withAllowMissingColumnNames();
        if (StringUtils.isNotEmpty(separatorChar)) {
            format = format.withDelimiter(toChar(separatorChar).charAt(0));
        }
        if (StringUtils.isNotEmpty(escapeChar)) {
            format = format.withEscape(toChar(escapeChar).charAt(0));
        }
        if (StringUtils.isNotEmpty(quoteChar)) {
            format = format.withQuoteMode(QuoteMode.MINIMAL).withQuote(toChar(quoteChar).charAt(0));
        }
    }
    return format;
}
Also used : CSVFormat(org.apache.commons.csv.CSVFormat)

Example 42 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project nifi by apache.

the class TestCSVRecordReader method testMultipleRecordsEscapedWithSpecialChar.

@Test
public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, MalformedRecordException {
    char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
    final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"').withDelimiter(delimiter);
    final List<RecordField> fields = getDefaultFields();
    fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account_escapedchar.csv"));
        final CSVRecordReader reader = createReader(fis, schema, format)) {
        final Object[] firstRecord = reader.nextRecord().getValues();
        final Object[] firstExpectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
        Assert.assertArrayEquals(firstExpectedValues, firstRecord);
        final Object[] secondRecord = reader.nextRecord().getValues();
        final Object[] secondExpectedValues = new Object[] { "2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA" };
        Assert.assertArrayEquals(secondExpectedValues, secondRecord);
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CSVFormat(org.apache.commons.csv.CSVFormat) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 43 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project hmftools by hartwigmedical.

the class PatientCancerTypes method writeRecords.

public static void writeRecords(@NotNull final String outputPath, @NotNull final List<PatientCancerTypes> patientCancerTypes) throws IOException {
    final CSVFormat format = CSVFormat.DEFAULT.withNullString("").withHeader(Header.class);
    final CSVPrinter printer = new CSVPrinter(new FileWriter(outputPath), format);
    printer.printRecords(patientCancerTypes.stream().map(PatientCancerTypes::csvRecord).collect(Collectors.toList()));
    printer.close();
}
Also used : CSVPrinter(org.apache.commons.csv.CSVPrinter) FileWriter(java.io.FileWriter) CSVFormat(org.apache.commons.csv.CSVFormat)

Example 44 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project hmftools by hartwigmedical.

the class PortalClinicalData method writeRecords.

public static void writeRecords(@NotNull final String outputPath, @NotNull final List<PortalClinicalData> patientCancerTypes) throws IOException {
    final CSVFormat format = CSVFormat.DEFAULT.withHeader(Header.class);
    final CSVPrinter printer = new CSVPrinter(new FileWriter(outputPath), format);
    printer.printRecords(patientCancerTypes.stream().map(PortalClinicalData::csvRecord).collect(Collectors.toList()));
    printer.close();
}
Also used : CSVPrinter(org.apache.commons.csv.CSVPrinter) FileWriter(java.io.FileWriter) CSVFormat(org.apache.commons.csv.CSVFormat)

Example 45 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project jaqy by Teradata.

the class CSVExporterFactory method getHandler.

@Override
public JaqyExporter getHandler(CommandLine cmdLine, JaqyInterpreter interpreter) throws Exception {
    Charset charset = DEFAULT_CHARSET;
    CSVFormat format = CSVFormat.DEFAULT;
    HashMap<Integer, CSVExportInfo> exportInfoMap = new HashMap<Integer, CSVExportInfo>();
    CSVNameGen nameGen = new CSVNameGen(DEFAULT_NAME_PATTERN);
    Charset encoding = DEFAULT_CHARSET;
    for (Option option : cmdLine.getOptions()) {
        switch(option.getOpt().charAt(0)) {
            case 'c':
                {
                    charset = Charset.forName(option.getValue());
                    break;
                }
            case 'd':
                {
                    char delimiter = CSVUtils.getChar(option.getValue());
                    if (delimiter == 0)
                        throw new IllegalArgumentException("invalid delimiter: " + option.getValue());
                    format = format.withDelimiter(delimiter);
                    break;
                }
            case 't':
                {
                    format = CSVUtils.getFormat(option.getValue());
                    break;
                }
            case 'n':
                {
                    String fmt = option.getValue();
                    nameGen = new CSVNameGen(fmt);
                    // now check if the name is a valid format.
                    if (fmt.equals(nameGen.getName(1)))
                        interpreter.error("Invalid name pattern: " + fmt);
                    break;
                }
            case 'e':
                {
                    encoding = Charset.forName(option.getValue());
                    break;
                }
            case 'f':
                {
                    int column = Integer.parseInt(option.getValue());
                    if (column < 1) {
                        interpreter.error("Column index cannot be smaller than 1.");
                    }
                    CSVExportInfo info = new CSVExportInfo(nameGen, encoding);
                    exportInfoMap.put(column, info);
                    break;
                }
        }
    }
    String[] args = cmdLine.getArgs();
    if (args.length == 0)
        throw new IllegalArgumentException("missing file name.");
    Path file = interpreter.getPath(args[0]);
    return new CSVExporter(file, charset, format, exportInfoMap);
}
Also used : Path(com.teradata.jaqy.interfaces.Path) HashMap(java.util.HashMap) Charset(java.nio.charset.Charset) CSVExportInfo(com.teradata.jaqy.utils.CSVExportInfo) CSVNameGen(com.teradata.jaqy.utils.CSVNameGen) CSVFormat(org.apache.commons.csv.CSVFormat) Option(org.apache.commons.cli.Option)

Aggregations

CSVFormat (org.apache.commons.csv.CSVFormat)59 IOException (java.io.IOException)23 CSVRecord (org.apache.commons.csv.CSVRecord)22 CSVParser (org.apache.commons.csv.CSVParser)19 ArrayList (java.util.ArrayList)14 StringReader (java.io.StringReader)13 CSVPrinter (org.apache.commons.csv.CSVPrinter)10 InputStream (java.io.InputStream)9 InputStreamReader (java.io.InputStreamReader)8 HashMap (java.util.HashMap)8 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)8 RecordField (org.apache.nifi.serialization.record.RecordField)8 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)8 Test (org.junit.Test)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)7 Reader (java.io.Reader)7 LinkedHashMap (java.util.LinkedHashMap)7 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)7 MapRecord (org.apache.nifi.serialization.record.MapRecord)7 Record (org.apache.nifi.serialization.record.Record)7