use of com.opencsv.RFC4180ParserBuilder in project tribuo by oracle.
the class CSVSaver method save.
/**
* Saves the dataset to the specified path.
* @param csvPath The path to save to.
* @param dataset The dataset to save.
* @param responseNames The response names set.
* @param <T> The output type.
* @throws IOException If the disk write failed.
*/
public <T extends Output<T>> void save(Path csvPath, Dataset<T> dataset, Set<String> responseNames) throws IOException {
boolean isMultiOutput = responseNames.size() > 1;
ImmutableFeatureMap features = dataset.getFeatureIDMap();
int ncols = features.size() + responseNames.size();
//
// Initialize the CSV header row.
String[] headerLine = new String[ncols];
Map<String, Integer> responseToColumn = new HashMap<>();
int col = 0;
for (String response : responseNames) {
headerLine[col] = response;
responseToColumn.put(response, col);
col++;
}
for (int i = 0; i < features.size(); i++) {
headerLine[col++] = features.get(i).getName();
}
// Write the CSV
try (ICSVWriter writer = new CSVParserWriter(Files.newBufferedWriter(csvPath, StandardCharsets.UTF_8), new RFC4180ParserBuilder().withSeparator(separator).withQuoteChar(quote).build(), "\n")) {
writer.writeNext(headerLine);
for (Example<T> e : dataset) {
String[] denseOutput = (isMultiOutput) ? densifyMultiOutput(e, responseToColumn) : densifySingleOutput(e);
String[] featureArr = generateFeatureArray(e, features);
if (featureArr.length != features.size()) {
throw new IllegalStateException(String.format("Invalid example: had %d features, expected %d.", featureArr.length, features.size()));
}
//
// Copy responses and features into a single array
String[] line = new String[ncols];
System.arraycopy(denseOutput, 0, line, 0, denseOutput.length);
System.arraycopy(featureArr, 0, line, denseOutput.length, featureArr.length);
writer.writeNext(line);
}
}
}
use of com.opencsv.RFC4180ParserBuilder in project leetcode-practice by snehasishroy.
the class SenseMachineCoding method readOrderProducts.
private void readOrderProducts() throws IOException, CsvValidationException {
RFC4180Parser rfc4180Parser = new RFC4180ParserBuilder().build();
// this ensures correct parsing of files with special character like a,"this is a big string",10,20
try (CSVReader reader = new CSVReaderBuilder(new FileReader(directory + "order_products__prior.csv")).withCSVParser(rfc4180Parser).withSkipLines(1).build()) {
String[] lineInArray;
while ((lineInArray = reader.readNext()) != null) {
int productId = Integer.parseInt(lineInArray[1]);
int departmentId = productsToDepartmentIds.get(productId);
int orderId = Integer.parseInt(lineInArray[0]);
Pair<Integer, Integer> timeInfo = orders.get(orderId);
int dayOfWeek = timeInfo.getKey();
int hourOfDay = timeInfo.getValue();
int curUserId = orderToUserMapping.get(orderId);
// dayOfWeek -> {hour -> {department_id -> frequency}}}
Map<Integer, Integer> departmentFrequencies = frequencyMapping.get(dayOfWeek).get(hourOfDay);
Set<Integer> userIds = userMapping.get(dayOfWeek).get(hourOfDay).computeIfAbsent(departmentId, __ -> new HashSet<>());
if (!userIds.contains(curUserId)) {
// if user has not shopped in the current department during the current hour, then process it
departmentFrequencies.put(departmentId, departmentFrequencies.getOrDefault(departmentId, 0) + 1);
userIds.add(curUserId);
}
}
}
}
use of com.opencsv.RFC4180ParserBuilder in project unomi by apache.
the class LineSplitProcessor method process.
@Override
public void process(Exchange exchange) throws Exception {
// In case of one shot import we check the header and overwrite import config
ImportConfiguration importConfigOneShot = (ImportConfiguration) exchange.getIn().getHeader(RouterConstants.HEADER_IMPORT_CONFIG_ONESHOT);
String configType = (String) exchange.getIn().getHeader(RouterConstants.HEADER_CONFIG_TYPE);
if (importConfigOneShot != null) {
fieldsMapping = (Map<String, Integer>) importConfigOneShot.getProperties().get("mapping");
propertiesToOverwrite = importConfigOneShot.getPropertiesToOverwrite();
mergingProperty = importConfigOneShot.getMergingProperty();
overwriteExistingProfiles = importConfigOneShot.isOverwriteExistingProfiles();
columnSeparator = importConfigOneShot.getColumnSeparator();
hasHeader = importConfigOneShot.isHasHeader();
hasDeleteColumn = importConfigOneShot.isHasDeleteColumn();
multiValueSeparator = importConfigOneShot.getMultiValueSeparator();
multiValueDelimiter = importConfigOneShot.getMultiValueDelimiter();
}
if ((Integer) exchange.getProperty("CamelSplitIndex") == 0 && hasHeader) {
exchange.setProperty(Exchange.ROUTE_STOP, Boolean.TRUE);
return;
}
RFC4180Parser rfc4180Parser = new RFC4180ParserBuilder().withSeparator(columnSeparator.charAt(0)).build();
logger.debug("$$$$ : LineSplitProcessor : BODY : " + (String) exchange.getIn().getBody());
String[] profileData = rfc4180Parser.parseLine(((String) exchange.getIn().getBody()));
ProfileToImport profileToImport = new ProfileToImport();
profileToImport.setItemId(UUID.randomUUID().toString());
profileToImport.setItemType("profile");
profileToImport.setScope(RouterConstants.SYSTEM_SCOPE);
if (profileData.length > 0 && StringUtils.isNotBlank(profileData[0])) {
if ((hasDeleteColumn && (fieldsMapping.size() > (profileData.length - 1))) || (!hasDeleteColumn && (fieldsMapping.size() > (profileData.length)))) {
throw new BadProfileDataFormatException("The mapping does not match the number of column : line [" + ((Integer) exchange.getProperty("CamelSplitIndex") + 1) + "]", new Throwable("MAPPING_COLUMN_MATCH"));
}
logger.debug("$$$$ : LineSplitProcessor : MAPPING : " + fieldsMapping.keySet());
Map<String, Object> properties = new HashMap<>();
for (String fieldMappingKey : fieldsMapping.keySet()) {
PropertyType propertyType = RouterUtils.getPropertyTypeById(profilePropertyTypes, fieldMappingKey);
if (fieldMappingKey != null && fieldsMapping.get(fieldMappingKey) != null && profileData != null && profileData[fieldsMapping.get(fieldMappingKey)] != null) {
logger.debug("$$$$ : LineSplitProcessor : PropType value : {}", profileData[fieldsMapping.get(fieldMappingKey)].trim());
} else {
logger.debug("$$$$ : LineSplitProcessor : no profileData found for fieldMappingKey=" + fieldMappingKey);
}
if (profileData.length > fieldsMapping.get(fieldMappingKey)) {
try {
if (propertyType == null) {
logger.error("No valid property type found for propertyTypeId=" + fieldMappingKey);
} else {
if (propertyType.getValueTypeId() == null) {
logger.error("No value type id found for property type " + propertyType.getItemId());
}
}
if (propertyType.getValueTypeId().equals("string") || propertyType.getValueTypeId().equals("email") || propertyType.getValueTypeId().equals("date")) {
if (BooleanUtils.isTrue(propertyType.isMultivalued())) {
String multivalueArray = profileData[fieldsMapping.get(fieldMappingKey)].trim();
if (StringUtils.isNotBlank(multiValueDelimiter) && multiValueDelimiter.length() == 2) {
multivalueArray = multivalueArray.replaceAll("\\" + multiValueDelimiter.charAt(0), "").replaceAll("\\" + multiValueDelimiter.charAt(1), "");
}
if (multivalueArray.contains(multiValueSeparator)) {
String[] valuesArray = multivalueArray.split("\\" + multiValueSeparator);
properties.put(fieldMappingKey, valuesArray);
} else {
if (StringUtils.isNotBlank(multivalueArray)) {
properties.put(fieldMappingKey, new String[] { multivalueArray });
} else {
properties.put(fieldMappingKey, new String[] {});
}
}
} else {
String singleValue = profileData[fieldsMapping.get(fieldMappingKey)].trim();
properties.put(fieldMappingKey, singleValue);
}
} else if (propertyType.getValueTypeId().equals("boolean")) {
properties.put(fieldMappingKey, new Boolean(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
} else if (propertyType.getValueTypeId().equals("integer")) {
properties.put(fieldMappingKey, new Integer(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
} else if (propertyType.getValueTypeId().equals("long")) {
properties.put(fieldMappingKey, new Long(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
}
} catch (Throwable t) {
logger.error("Error converting profileData", t);
if (fieldMappingKey != null && fieldsMapping.get(fieldMappingKey) != null && profileData != null && profileData[fieldsMapping.get(fieldMappingKey)] != null) {
throw new BadProfileDataFormatException("Unable to convert '" + profileData[fieldsMapping.get(fieldMappingKey)].trim() + "' to " + propertyType != null ? propertyType.getValueTypeId() : "Null propertyType ", new Throwable("DATA_TYPE"));
} else {
throw new BadProfileDataFormatException("Unable to find profile data for key " + fieldMappingKey, new Throwable("DATA_TYPE"));
}
}
}
}
profileToImport.setProperties(properties);
profileToImport.setMergingProperty(mergingProperty);
profileToImport.setPropertiesToOverwrite(propertiesToOverwrite);
profileToImport.setOverwriteExistingProfiles(overwriteExistingProfiles);
if (hasDeleteColumn && StringUtils.isNotBlank(profileData[profileData.length - 1]) && Boolean.parseBoolean(profileData[profileData.length - 1].trim())) {
profileToImport.setProfileToDelete(true);
}
} else {
throw new BadProfileDataFormatException("Empty line : line [" + ((Integer) exchange.getProperty("CamelSplitIndex") + 1) + "]", new Throwable("EMPTY_LINE"));
}
exchange.getIn().setBody(profileToImport, ProfileToImport.class);
if (RouterConstants.CONFIG_TYPE_KAFKA.equals(configType)) {
exchange.getIn().setHeader(KafkaConstants.PARTITION_KEY, 0);
exchange.getIn().setHeader(KafkaConstants.KEY, "1");
}
}
Aggregations