use of org.apache.commons.csv.CSVFormat in project pinot by linkedin.
the class CSVRecordReader method getFormat.
private CSVFormat getFormat() {
CSVFormat format = getFormatFromConfig().withDelimiter(getDelimiterFromConfig());
String[] header = getHeaderFromConfig();
if (header != null) {
format = format.withHeader(header);
} else {
format = format.withHeader();
}
return format;
}
use of org.apache.commons.csv.CSVFormat in project camel by apache.
the class CsvRecordConvertersTest method setUp.
@Before
public void setUp() throws Exception {
CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C");
CSVParser parser = new CSVParser(new StringReader("1,2,3"), format);
List<CSVRecord> records = parser.getRecords();
record = records.get(0);
}
use of org.apache.commons.csv.CSVFormat in project kylo by Teradata.
the class CSVAutoDetect method guessDelimiter.
private Character guessDelimiter(List<LineStats> lineStats, String value, Character quote, boolean headerRow) throws IOException {
// Assume delimiter exists in first line and compare to subsequent lines
if (lineStats.size() > 0) {
LineStats firstLineStat = lineStats.get(0);
Map<Character, Integer> firstLineDelimCounts = firstLineStat.calcDelimCountsOrdered();
if (firstLineDelimCounts != null && firstLineDelimCounts.size() > 0) {
List<Character> candidates = new ArrayList<>();
// Attempt to parse given delimiter
Set<Character> firstLineDelimKeys = sortDelimitersIntoPreferredOrder(firstLineDelimCounts.keySet());
for (Character delim : firstLineDelimKeys) {
CSVFormat format;
if (headerRow) {
format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(delim).withQuote(quote);
} else {
format = CSVFormat.DEFAULT.withDelimiter(delim).withQuote(quote);
}
try (StringReader sr = new StringReader(value)) {
try (CSVParser parser = format.parse(sr)) {
if (parser.getHeaderMap() != null) {
int size = parser.getHeaderMap().size();
List<CSVRecord> records = parser.getRecords();
boolean match = records.stream().allMatch(record -> record.size() == size);
if (match) {
return delim;
}
}
}
}
Integer delimCount = firstLineDelimCounts.get(delim);
boolean match = true;
for (int i = 1; i < lineStats.size() && match; i++) {
LineStats thisLine = lineStats.get(i);
Integer rowDelimCount = thisLine.delimStats.get(delim);
match = delimCount.equals(rowDelimCount);
}
if (match) {
candidates.add(delim);
}
}
if (candidates.size() > 0) {
// All agree on a single delimiter
if (candidates.size() == 1) {
return candidates.get(0);
} else {
int count = 0;
// Return highest delimiter from candidates
for (Character delim : firstLineDelimKeys) {
if (candidates.get(count++) != null) {
return delim;
}
}
}
}
}
}
return null;
}
use of org.apache.commons.csv.CSVFormat in project kylo by Teradata.
the class CSVAutoDetect method detectCSVFormat.
/**
* Parses a sample file to allow schema specification when creating a new feed.
*
* @param sampleText the sample text
* @return A configured parser
* @throws IOException If there is an error parsing the sample file
*/
public CSVFormat detectCSVFormat(String sampleText, boolean headerRow, String seperatorStr) throws IOException {
CSVFormat format = CSVFormat.DEFAULT.withAllowMissingColumnNames();
Character separatorChar = null;
if (StringUtils.isNotBlank(seperatorStr)) {
separatorChar = seperatorStr.charAt(0);
}
try (BufferedReader br = new BufferedReader(new StringReader(sampleText))) {
List<LineStats> lineStats = generateStats(br, separatorChar);
Character quote = guessQuote(lineStats);
Character delim = guessDelimiter(lineStats, sampleText, quote, headerRow);
if (delim == null) {
throw new IOException("Unrecognized format");
}
format = format.withDelimiter(delim);
format = format.withQuoteMode(QuoteMode.MINIMAL).withQuote(quote);
}
return format;
}
use of org.apache.commons.csv.CSVFormat in project kylo by Teradata.
the class CSVFileSchemaParser method parse.
@Override
public Schema parse(InputStream is, Charset charset, TableSchemaType target) throws IOException {
Validate.notNull(target, "target must not be null");
Validate.notNull(is, "stream must not be null");
Validate.notNull(charset, "charset must not be null");
validate();
// Parse the file
String sampleData = ParserHelper.extractSampleLines(is, charset, numRowsToSample);
Validate.notEmpty(sampleData, "No data in file");
CSVFormat format = createCSVFormat(sampleData);
try (Reader reader = new StringReader(sampleData)) {
CSVParser parser = format.parse(reader);
DefaultFileSchema fileSchema = populateSchema(parser);
fileSchema.setCharset(charset.name());
// Convert to target schema with proper derived types
Schema targetSchema = convertToTarget(target, fileSchema);
return targetSchema;
}
}
Aggregations