use of org.apache.commons.csv.CSVFormat in project kylo by Teradata.
the class CSVFileSchemaParser method createCSVFormat.
private CSVFormat createCSVFormat(String sampleData) throws IOException {
CSVFormat format;
if (autoDetect) {
CSVAutoDetect autoDetect = new CSVAutoDetect();
if (bufferSize == Integer.MIN_VALUE) {
// init buffer size from Spring configuration
SpringApplicationContext.autowire(this);
}
if (bufferSize > 0) {
autoDetect.setBufferSize(bufferSize);
}
format = autoDetect.detectCSVFormat(sampleData, this.headerRow, this.separatorChar);
this.separatorChar = Character.toString(format.getDelimiter());
this.quoteChar = Character.toString(format.getQuoteCharacter());
} else {
format = CSVFormat.DEFAULT.withAllowMissingColumnNames();
if (StringUtils.isNotEmpty(separatorChar)) {
format = format.withDelimiter(toChar(separatorChar).charAt(0));
}
if (StringUtils.isNotEmpty(escapeChar)) {
format = format.withEscape(toChar(escapeChar).charAt(0));
}
if (StringUtils.isNotEmpty(quoteChar)) {
format = format.withQuoteMode(QuoteMode.MINIMAL).withQuote(toChar(quoteChar).charAt(0));
}
}
return format;
}
use of org.apache.commons.csv.CSVFormat in project nifi by apache.
the class TestCSVRecordReader method testMultipleRecordsEscapedWithSpecialChar.
@Test
public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, MalformedRecordException {
char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"').withDelimiter(delimiter);
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account_escapedchar.csv"));
final CSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
Assert.assertArrayEquals(firstExpectedValues, firstRecord);
final Object[] secondRecord = reader.nextRecord().getValues();
final Object[] secondExpectedValues = new Object[] { "2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA" };
Assert.assertArrayEquals(secondExpectedValues, secondRecord);
assertNull(reader.nextRecord());
}
}
use of org.apache.commons.csv.CSVFormat in project hmftools by hartwigmedical.
the class PatientCancerTypes method writeRecords.
public static void writeRecords(@NotNull final String outputPath, @NotNull final List<PatientCancerTypes> patientCancerTypes) throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withNullString("").withHeader(Header.class);
final CSVPrinter printer = new CSVPrinter(new FileWriter(outputPath), format);
printer.printRecords(patientCancerTypes.stream().map(PatientCancerTypes::csvRecord).collect(Collectors.toList()));
printer.close();
}
use of org.apache.commons.csv.CSVFormat in project hmftools by hartwigmedical.
the class PortalClinicalData method writeRecords.
public static void writeRecords(@NotNull final String outputPath, @NotNull final List<PortalClinicalData> patientCancerTypes) throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withHeader(Header.class);
final CSVPrinter printer = new CSVPrinter(new FileWriter(outputPath), format);
printer.printRecords(patientCancerTypes.stream().map(PortalClinicalData::csvRecord).collect(Collectors.toList()));
printer.close();
}
use of org.apache.commons.csv.CSVFormat in project jaqy by Teradata.
the class CSVExporterFactory method getHandler.
@Override
public JaqyExporter getHandler(CommandLine cmdLine, JaqyInterpreter interpreter) throws Exception {
Charset charset = DEFAULT_CHARSET;
CSVFormat format = CSVFormat.DEFAULT;
HashMap<Integer, CSVExportInfo> exportInfoMap = new HashMap<Integer, CSVExportInfo>();
CSVNameGen nameGen = new CSVNameGen(DEFAULT_NAME_PATTERN);
Charset encoding = DEFAULT_CHARSET;
for (Option option : cmdLine.getOptions()) {
switch(option.getOpt().charAt(0)) {
case 'c':
{
charset = Charset.forName(option.getValue());
break;
}
case 'd':
{
char delimiter = CSVUtils.getChar(option.getValue());
if (delimiter == 0)
throw new IllegalArgumentException("invalid delimiter: " + option.getValue());
format = format.withDelimiter(delimiter);
break;
}
case 't':
{
format = CSVUtils.getFormat(option.getValue());
break;
}
case 'n':
{
String fmt = option.getValue();
nameGen = new CSVNameGen(fmt);
// now check if the name is a valid format.
if (fmt.equals(nameGen.getName(1)))
interpreter.error("Invalid name pattern: " + fmt);
break;
}
case 'e':
{
encoding = Charset.forName(option.getValue());
break;
}
case 'f':
{
int column = Integer.parseInt(option.getValue());
if (column < 1) {
interpreter.error("Column index cannot be smaller than 1.");
}
CSVExportInfo info = new CSVExportInfo(nameGen, encoding);
exportInfoMap.put(column, info);
break;
}
}
}
String[] args = cmdLine.getArgs();
if (args.length == 0)
throw new IllegalArgumentException("missing file name.");
Path file = interpreter.getPath(args[0]);
return new CSVExporter(file, charset, format, exportInfoMap);
}
Aggregations