use of org.talend.dataprep.api.type.Type.STRING in project data-prep by Talend.
the class XlsSchemaParser method guessColumnType.
/**
* @param colId the column id.
* @param columnRows all rows with previously guessed type: key=row number, value= guessed type
* @param averageHeaderSize
* @return
*/
private Type guessColumnType(Integer colId, SortedMap<Integer, String> columnRows, int averageHeaderSize) {
// calculate number per type
Map<String, Long> perTypeNumber = columnRows.tailMap(averageHeaderSize).values().stream().collect(Collectors.groupingBy(w -> w, Collectors.counting()));
OptionalLong maxOccurrence = perTypeNumber.values().stream().mapToLong(Long::longValue).max();
if (!maxOccurrence.isPresent()) {
return ANY;
}
List<String> duplicatedMax = new ArrayList<>();
perTypeNumber.forEach((type1, aLong) -> {
if (aLong >= maxOccurrence.getAsLong()) {
duplicatedMax.add(type1);
}
});
String guessedType;
if (duplicatedMax.size() == 1) {
guessedType = duplicatedMax.get(0);
} else {
// as we have more than one type we guess ANY
guessedType = ANY.getName();
}
LOGGER.debug("guessed type for column #{} is {}", colId, guessedType);
return Type.get(guessedType);
}
Aggregations