Search in sources :

Example 1 with RFC4180ParserBuilder

use of com.opencsv.RFC4180ParserBuilder in project tribuo by oracle.

the class CSVSaver method save.

/**
 * Saves the dataset to the specified path.
 * @param csvPath The path to save to.
 * @param dataset The dataset to save.
 * @param responseNames The response names set.
 * @param <T> The output type.
 * @throws IOException If the disk write failed.
 */
public <T extends Output<T>> void save(Path csvPath, Dataset<T> dataset, Set<String> responseNames) throws IOException {
    boolean isMultiOutput = responseNames.size() > 1;
    ImmutableFeatureMap features = dataset.getFeatureIDMap();
    int ncols = features.size() + responseNames.size();
    // 
    // Initialize the CSV header row.
    String[] headerLine = new String[ncols];
    Map<String, Integer> responseToColumn = new HashMap<>();
    int col = 0;
    for (String response : responseNames) {
        headerLine[col] = response;
        responseToColumn.put(response, col);
        col++;
    }
    for (int i = 0; i < features.size(); i++) {
        headerLine[col++] = features.get(i).getName();
    }
    // Write the CSV
    try (ICSVWriter writer = new CSVParserWriter(Files.newBufferedWriter(csvPath, StandardCharsets.UTF_8), new RFC4180ParserBuilder().withSeparator(separator).withQuoteChar(quote).build(), "\n")) {
        writer.writeNext(headerLine);
        for (Example<T> e : dataset) {
            String[] denseOutput = (isMultiOutput) ? densifyMultiOutput(e, responseToColumn) : densifySingleOutput(e);
            String[] featureArr = generateFeatureArray(e, features);
            if (featureArr.length != features.size()) {
                throw new IllegalStateException(String.format("Invalid example: had %d features, expected %d.", featureArr.length, features.size()));
            }
            // 
            // Copy responses and features into a single array
            String[] line = new String[ncols];
            System.arraycopy(denseOutput, 0, line, 0, denseOutput.length);
            System.arraycopy(featureArr, 0, line, denseOutput.length, featureArr.length);
            writer.writeNext(line);
        }
    }
}
Also used : HashMap(java.util.HashMap) RFC4180ParserBuilder(com.opencsv.RFC4180ParserBuilder) ICSVWriter(com.opencsv.ICSVWriter) ImmutableFeatureMap(org.tribuo.ImmutableFeatureMap) CSVParserWriter(com.opencsv.CSVParserWriter)

Example 2 with RFC4180ParserBuilder

use of com.opencsv.RFC4180ParserBuilder in project leetcode-practice by snehasishroy.

the class SenseMachineCoding method readOrderProducts.

private void readOrderProducts() throws IOException, CsvValidationException {
    RFC4180Parser rfc4180Parser = new RFC4180ParserBuilder().build();
    // this ensures correct parsing of files with special character like a,"this is a big string",10,20
    try (CSVReader reader = new CSVReaderBuilder(new FileReader(directory + "order_products__prior.csv")).withCSVParser(rfc4180Parser).withSkipLines(1).build()) {
        String[] lineInArray;
        while ((lineInArray = reader.readNext()) != null) {
            int productId = Integer.parseInt(lineInArray[1]);
            int departmentId = productsToDepartmentIds.get(productId);
            int orderId = Integer.parseInt(lineInArray[0]);
            Pair<Integer, Integer> timeInfo = orders.get(orderId);
            int dayOfWeek = timeInfo.getKey();
            int hourOfDay = timeInfo.getValue();
            int curUserId = orderToUserMapping.get(orderId);
            // dayOfWeek -> {hour -> {department_id -> frequency}}}
            Map<Integer, Integer> departmentFrequencies = frequencyMapping.get(dayOfWeek).get(hourOfDay);
            Set<Integer> userIds = userMapping.get(dayOfWeek).get(hourOfDay).computeIfAbsent(departmentId, __ -> new HashSet<>());
            if (!userIds.contains(curUserId)) {
                // if user has not shopped in the current department during the current hour, then process it
                departmentFrequencies.put(departmentId, departmentFrequencies.getOrDefault(departmentId, 0) + 1);
                userIds.add(curUserId);
            }
        }
    }
}
Also used : RFC4180Parser(com.opencsv.RFC4180Parser) CSVReader(com.opencsv.CSVReader) CSVReaderBuilder(com.opencsv.CSVReaderBuilder) RFC4180ParserBuilder(com.opencsv.RFC4180ParserBuilder) FileReader(java.io.FileReader)

Example 3 with RFC4180ParserBuilder

use of com.opencsv.RFC4180ParserBuilder in project unomi by apache.

the class LineSplitProcessor method process.

@Override
public void process(Exchange exchange) throws Exception {
    // In case of one shot import we check the header and overwrite import config
    ImportConfiguration importConfigOneShot = (ImportConfiguration) exchange.getIn().getHeader(RouterConstants.HEADER_IMPORT_CONFIG_ONESHOT);
    String configType = (String) exchange.getIn().getHeader(RouterConstants.HEADER_CONFIG_TYPE);
    if (importConfigOneShot != null) {
        fieldsMapping = (Map<String, Integer>) importConfigOneShot.getProperties().get("mapping");
        propertiesToOverwrite = importConfigOneShot.getPropertiesToOverwrite();
        mergingProperty = importConfigOneShot.getMergingProperty();
        overwriteExistingProfiles = importConfigOneShot.isOverwriteExistingProfiles();
        columnSeparator = importConfigOneShot.getColumnSeparator();
        hasHeader = importConfigOneShot.isHasHeader();
        hasDeleteColumn = importConfigOneShot.isHasDeleteColumn();
        multiValueSeparator = importConfigOneShot.getMultiValueSeparator();
        multiValueDelimiter = importConfigOneShot.getMultiValueDelimiter();
    }
    if ((Integer) exchange.getProperty("CamelSplitIndex") == 0 && hasHeader) {
        exchange.setProperty(Exchange.ROUTE_STOP, Boolean.TRUE);
        return;
    }
    RFC4180Parser rfc4180Parser = new RFC4180ParserBuilder().withSeparator(columnSeparator.charAt(0)).build();
    logger.debug("$$$$ : LineSplitProcessor : BODY : " + (String) exchange.getIn().getBody());
    String[] profileData = rfc4180Parser.parseLine(((String) exchange.getIn().getBody()));
    ProfileToImport profileToImport = new ProfileToImport();
    profileToImport.setItemId(UUID.randomUUID().toString());
    profileToImport.setItemType("profile");
    profileToImport.setScope(RouterConstants.SYSTEM_SCOPE);
    if (profileData.length > 0 && StringUtils.isNotBlank(profileData[0])) {
        if ((hasDeleteColumn && (fieldsMapping.size() > (profileData.length - 1))) || (!hasDeleteColumn && (fieldsMapping.size() > (profileData.length)))) {
            throw new BadProfileDataFormatException("The mapping does not match the number of column : line [" + ((Integer) exchange.getProperty("CamelSplitIndex") + 1) + "]", new Throwable("MAPPING_COLUMN_MATCH"));
        }
        logger.debug("$$$$ : LineSplitProcessor : MAPPING : " + fieldsMapping.keySet());
        Map<String, Object> properties = new HashMap<>();
        for (String fieldMappingKey : fieldsMapping.keySet()) {
            PropertyType propertyType = RouterUtils.getPropertyTypeById(profilePropertyTypes, fieldMappingKey);
            if (fieldMappingKey != null && fieldsMapping.get(fieldMappingKey) != null && profileData != null && profileData[fieldsMapping.get(fieldMappingKey)] != null) {
                logger.debug("$$$$ : LineSplitProcessor : PropType value : {}", profileData[fieldsMapping.get(fieldMappingKey)].trim());
            } else {
                logger.debug("$$$$ : LineSplitProcessor : no profileData found for fieldMappingKey=" + fieldMappingKey);
            }
            if (profileData.length > fieldsMapping.get(fieldMappingKey)) {
                try {
                    if (propertyType == null) {
                        logger.error("No valid property type found for propertyTypeId=" + fieldMappingKey);
                    } else {
                        if (propertyType.getValueTypeId() == null) {
                            logger.error("No value type id found for property type " + propertyType.getItemId());
                        }
                    }
                    if (propertyType.getValueTypeId().equals("string") || propertyType.getValueTypeId().equals("email") || propertyType.getValueTypeId().equals("date")) {
                        if (BooleanUtils.isTrue(propertyType.isMultivalued())) {
                            String multivalueArray = profileData[fieldsMapping.get(fieldMappingKey)].trim();
                            if (StringUtils.isNotBlank(multiValueDelimiter) && multiValueDelimiter.length() == 2) {
                                multivalueArray = multivalueArray.replaceAll("\\" + multiValueDelimiter.charAt(0), "").replaceAll("\\" + multiValueDelimiter.charAt(1), "");
                            }
                            if (multivalueArray.contains(multiValueSeparator)) {
                                String[] valuesArray = multivalueArray.split("\\" + multiValueSeparator);
                                properties.put(fieldMappingKey, valuesArray);
                            } else {
                                if (StringUtils.isNotBlank(multivalueArray)) {
                                    properties.put(fieldMappingKey, new String[] { multivalueArray });
                                } else {
                                    properties.put(fieldMappingKey, new String[] {});
                                }
                            }
                        } else {
                            String singleValue = profileData[fieldsMapping.get(fieldMappingKey)].trim();
                            properties.put(fieldMappingKey, singleValue);
                        }
                    } else if (propertyType.getValueTypeId().equals("boolean")) {
                        properties.put(fieldMappingKey, new Boolean(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
                    } else if (propertyType.getValueTypeId().equals("integer")) {
                        properties.put(fieldMappingKey, new Integer(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
                    } else if (propertyType.getValueTypeId().equals("long")) {
                        properties.put(fieldMappingKey, new Long(profileData[fieldsMapping.get(fieldMappingKey)].trim()));
                    }
                } catch (Throwable t) {
                    logger.error("Error converting profileData", t);
                    if (fieldMappingKey != null && fieldsMapping.get(fieldMappingKey) != null && profileData != null && profileData[fieldsMapping.get(fieldMappingKey)] != null) {
                        throw new BadProfileDataFormatException("Unable to convert '" + profileData[fieldsMapping.get(fieldMappingKey)].trim() + "' to " + propertyType != null ? propertyType.getValueTypeId() : "Null propertyType ", new Throwable("DATA_TYPE"));
                    } else {
                        throw new BadProfileDataFormatException("Unable to find profile data for key " + fieldMappingKey, new Throwable("DATA_TYPE"));
                    }
                }
            }
        }
        profileToImport.setProperties(properties);
        profileToImport.setMergingProperty(mergingProperty);
        profileToImport.setPropertiesToOverwrite(propertiesToOverwrite);
        profileToImport.setOverwriteExistingProfiles(overwriteExistingProfiles);
        if (hasDeleteColumn && StringUtils.isNotBlank(profileData[profileData.length - 1]) && Boolean.parseBoolean(profileData[profileData.length - 1].trim())) {
            profileToImport.setProfileToDelete(true);
        }
    } else {
        throw new BadProfileDataFormatException("Empty line : line [" + ((Integer) exchange.getProperty("CamelSplitIndex") + 1) + "]", new Throwable("EMPTY_LINE"));
    }
    exchange.getIn().setBody(profileToImport, ProfileToImport.class);
    if (RouterConstants.CONFIG_TYPE_KAFKA.equals(configType)) {
        exchange.getIn().setHeader(KafkaConstants.PARTITION_KEY, 0);
        exchange.getIn().setHeader(KafkaConstants.KEY, "1");
    }
}
Also used : BadProfileDataFormatException(org.apache.unomi.router.api.exceptions.BadProfileDataFormatException) RFC4180Parser(com.opencsv.RFC4180Parser) PropertyType(org.apache.unomi.api.PropertyType) RFC4180ParserBuilder(com.opencsv.RFC4180ParserBuilder) ImportConfiguration(org.apache.unomi.router.api.ImportConfiguration) ProfileToImport(org.apache.unomi.router.api.ProfileToImport)

Aggregations

RFC4180ParserBuilder (com.opencsv.RFC4180ParserBuilder)3 RFC4180Parser (com.opencsv.RFC4180Parser)2 CSVParserWriter (com.opencsv.CSVParserWriter)1 CSVReader (com.opencsv.CSVReader)1 CSVReaderBuilder (com.opencsv.CSVReaderBuilder)1 ICSVWriter (com.opencsv.ICSVWriter)1 FileReader (java.io.FileReader)1 HashMap (java.util.HashMap)1 PropertyType (org.apache.unomi.api.PropertyType)1 ImportConfiguration (org.apache.unomi.router.api.ImportConfiguration)1 ProfileToImport (org.apache.unomi.router.api.ProfileToImport)1 BadProfileDataFormatException (org.apache.unomi.router.api.exceptions.BadProfileDataFormatException)1 ImmutableFeatureMap (org.tribuo.ImmutableFeatureMap)1