Search in sources :

Example 51 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project Gaffer by gchq.

the class RoadTrafficCsvElementGeneratorTest method shouldParseSampleData.

@Test
public void shouldParseSampleData() throws IOException {
    // Given
    final OneToManyElementGenerator<CSVRecord> generator = new RoadTrafficCsvElementGenerator();
    try (final InputStream inputStream = StreamUtil.openStream(getClass(), "/roadTrafficSampleData.csv")) {
        final Iterable<CSVRecord> csvRecords = new CSVParser(new InputStreamReader(inputStream), CSVFormat.DEFAULT.withFirstRecordAsHeader());
        // When
        final Iterable<? extends Element> elements = generator.apply(csvRecords);
        // Then
        int entityCount = 0;
        int edgeCount = 0;
        for (final Element element : elements) {
            if (element instanceof Entity) {
                entityCount++;
            } else if (element instanceof Edge) {
                edgeCount++;
            } else {
                fail("Unrecognised element class: " + element.getClassName());
            }
        }
        assertEquals(1600, entityCount);
        assertEquals(700, edgeCount);
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) CSVParser(org.apache.commons.csv.CSVParser) Element(uk.gov.gchq.gaffer.data.element.Element) CSVRecord(org.apache.commons.csv.CSVRecord) Edge(uk.gov.gchq.gaffer.data.element.Edge) Test(org.junit.jupiter.api.Test)

Example 52 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project sonarqube by SonarSource.

the class MultivalueProperty method processRecords.

/**
 * In most cases we expect a single record. <br>Having multiple records means the input value was splitted over multiple lines (this is common in Maven).
 * For example:
 * <pre>
 *   &lt;sonar.exclusions&gt;
 *     src/foo,
 *     src/bar,
 *     src/biz
 *   &lt;sonar.exclusions&gt;
 * </pre>
 * In this case records will be merged to form a single list of items. Last item of a record is appended to first item of next record.
 * <p>
 * This is a very curious case, but we try to preserve line break in the middle of an item:
 * <pre>
 *   &lt;sonar.exclusions&gt;
 *     a
 *     b,
 *     c
 *   &lt;sonar.exclusions&gt;
 * </pre>
 * will produce ['a\nb', 'c']
 */
private static void processRecords(List<String> result, List<CSVRecord> records, Function<String, String> valueProcessor) {
    for (CSVRecord csvRecord : records) {
        Iterator<String> it = csvRecord.iterator();
        if (!result.isEmpty()) {
            String next = it.next();
            if (!next.isEmpty()) {
                int lastItemIdx = result.size() - 1;
                String previous = result.get(lastItemIdx);
                if (previous.isEmpty()) {
                    result.set(lastItemIdx, valueProcessor.apply(next));
                } else {
                    result.set(lastItemIdx, valueProcessor.apply(previous + "\n" + next));
                }
            }
        }
        it.forEachRemaining(s -> {
            String apply = valueProcessor.apply(s);
            result.add(apply);
        });
    }
}
Also used : CSVRecord(org.apache.commons.csv.CSVRecord)

Example 53 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project sonarqube by SonarSource.

the class MultivalueProperty method parseAsCsv.

public static String[] parseAsCsv(String key, String value, UnaryOperator<String> valueProcessor) {
    String cleanValue = MultivalueProperty.trimFieldsAndRemoveEmptyFields(value);
    List<String> result = new ArrayList<>();
    try (CSVParser csvParser = CSVFormat.RFC4180.builder().setSkipHeaderRecord(true).setIgnoreEmptyLines(true).setIgnoreSurroundingSpaces(true).build().parse(new StringReader(cleanValue))) {
        List<CSVRecord> records = csvParser.getRecords();
        if (records.isEmpty()) {
            return ArrayUtils.EMPTY_STRING_ARRAY;
        }
        processRecords(result, records, valueProcessor);
        return result.toArray(new String[result.size()]);
    } catch (IOException e) {
        throw new IllegalStateException("Property: '" + key + "' doesn't contain a valid CSV value: '" + value + "'", e);
    }
}
Also used : CSVParser(org.apache.commons.csv.CSVParser) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) CSVRecord(org.apache.commons.csv.CSVRecord) IOException(java.io.IOException)

Example 54 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project atlasmap by atlasmap.

the class CsvFieldReader method readFields.

private Field readFields(CsvField field) throws AtlasException {
    List<Field> fields = new ArrayList<>();
    CsvField csvField = field;
    CSVFormat csvFormat = csvConfig.newCsvFormat();
    try {
        document.mark(Integer.MAX_VALUE);
        CSVParser parser = csvFormat.parse(new InputStreamReader(document));
        AtlasPath atlasPath = new AtlasPath(csvField.getPath());
        int i = 0;
        Integer fieldIndex = atlasPath.getRootSegment().getCollectionIndex();
        if (fieldIndex != null) {
            for (CSVRecord record : parser) {
                if (i == fieldIndex) {
                    CsvField newField = CsvField.cloneOf(csvField);
                    // do not copy over index if set
                    newField.setIndex(null);
                    String value;
                    if (csvField.getColumn() != null) {
                        value = record.get(csvField.getColumn());
                    } else {
                        value = record.get(csvField.getName());
                    }
                    newField.setValue(value);
                    fields.add(newField);
                    break;
                }
                i++;
            }
        } else {
            for (CSVRecord record : parser) {
                CsvField collectionField = CsvField.cloneOf(csvField);
                // do not copy over index if set
                collectionField.setIndex(null);
                String value;
                if (csvField.getColumn() != null) {
                    value = record.get(csvField.getColumn());
                } else {
                    value = record.get(csvField.getName());
                }
                collectionField.setValue(value);
                AtlasPath collectionFieldPath = new AtlasPath(collectionField.getPath());
                collectionFieldPath.setCollectionIndex(0, i);
                collectionField.setPath(collectionFieldPath.toString());
                fields.add(collectionField);
                i++;
            }
        }
        document.reset();
    } catch (IOException e) {
        throw new AtlasException(e);
    }
    if (fields.size() == 1) {
        return fields.get(0);
    } else {
        FieldGroup fieldGroup = AtlasModelFactory.createFieldGroupFrom(field, true);
        fieldGroup.getField().addAll(fields);
        return fieldGroup;
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) FieldGroup(io.atlasmap.v2.FieldGroup) ArrayList(java.util.ArrayList) IOException(java.io.IOException) AtlasException(io.atlasmap.api.AtlasException) CsvField(io.atlasmap.csv.v2.CsvField) Field(io.atlasmap.v2.Field) CsvField(io.atlasmap.csv.v2.CsvField) CSVParser(org.apache.commons.csv.CSVParser) AtlasPath(io.atlasmap.core.AtlasPath) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 55 with CSVRecord

use of org.apache.commons.csv.CSVRecord in project webapp by elimu-ai.

the class LetterLearningEventsRestController method handleUploadCsvRequest.

@RequestMapping(value = "/csv", method = RequestMethod.POST)
public String handleUploadCsvRequest(@RequestParam("file") MultipartFile multipartFile, HttpServletResponse response) {
    logger.info("handleUploadCsvRequest");
    String name = multipartFile.getName();
    logger.info("name: " + name);
    // Expected format: "7161a85a0e4751cd_letter-learning-events_2020-04-23.csv"
    String originalFilename = multipartFile.getOriginalFilename();
    logger.info("originalFilename: " + originalFilename);
    // TODO: Send notification to the #📊-data-collection channel in Discord
    // Hide parts of the Android ID, e.g. "7161***51cd_word-learning-events_2020-04-23.csv"
    String anonymizedOriginalFilename = originalFilename.substring(0, 4) + "***" + originalFilename.substring(12);
    DiscordHelper.sendChannelMessage("Received dataset: `" + anonymizedOriginalFilename + "`", null, null, null, null);
    String androidIdExtractedFromFilename = AnalyticsHelper.extractAndroidIdFromCsvFilename(originalFilename);
    logger.info("androidIdExtractedFromFilename: \"" + androidIdExtractedFromFilename + "\"");
    String contentType = multipartFile.getContentType();
    logger.info("contentType: " + contentType);
    JSONObject jsonObject = new JSONObject();
    try {
        byte[] bytes = multipartFile.getBytes();
        logger.info("bytes.length: " + bytes.length);
        // Store a backup of the original CSV file on the filesystem (in case it will be needed for debugging)
        File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
        File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
        File analyticsDir = new File(languageDir, "analytics");
        File androidIdDir = new File(analyticsDir, "android-id_" + androidIdExtractedFromFilename);
        File letterLearningEventsDir = new File(androidIdDir, "letter-learning-events");
        letterLearningEventsDir.mkdirs();
        File csvFile = new File(letterLearningEventsDir, originalFilename);
        logger.info("Storing CSV file at " + csvFile);
        multipartFile.transferTo(csvFile);
        // Iterate each row in the CSV file
        Path csvFilePath = Paths.get(csvFile.toURI());
        logger.info("csvFilePath: " + csvFilePath);
        Reader reader = Files.newBufferedReader(csvFilePath);
        CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader(// The Room database ID
        "id", "time", "android_id", "package_name", "letter_id", "letter_text", "learning_event_type").withSkipHeaderRecord();
        CSVParser csvParser = new CSVParser(reader, csvFormat);
        for (CSVRecord csvRecord : csvParser) {
            logger.info("csvRecord: " + csvRecord);
            // Convert from CSV to Java
            LetterLearningEvent letterLearningEvent = new LetterLearningEvent();
            long timeInMillis = Long.valueOf(csvRecord.get("time"));
            Calendar time = Calendar.getInstance();
            time.setTimeInMillis(timeInMillis);
            letterLearningEvent.setTime(time);
            String androidId = csvRecord.get("android_id");
            letterLearningEvent.setAndroidId(androidId);
            String packageName = csvRecord.get("package_name");
            letterLearningEvent.setPackageName(packageName);
            Application application = applicationDao.readByPackageName(packageName);
            logger.info("application: " + application);
            if (application == null) {
                // Return error message saying that the reporting Application has not yet been added
                logger.warn("An Application with package name " + packageName + " was not found");
                jsonObject.put("result", "error");
                jsonObject.put("errorMessage", "An Application with package name " + packageName + " was not found");
                response.setStatus(HttpStatus.UNPROCESSABLE_ENTITY.value());
                break;
            }
            letterLearningEvent.setApplication(application);
            Long letterId = Long.valueOf(csvRecord.get("letter_id"));
            Letter letter = letterDao.read(letterId);
            logger.info("letter: " + letter);
            letterLearningEvent.setLetter(letter);
            if (letter == null) {
                // Return error message saying that the Letter ID was not found
                logger.warn("A Letter with ID " + letterId + " was not found");
                jsonObject.put("result", "error");
                jsonObject.put("errorMessage", "A Letter with ID " + letterId + " was not found");
                response.setStatus(HttpStatus.UNPROCESSABLE_ENTITY.value());
                break;
            }
            String letterText = csvRecord.get("letter_text");
            letterLearningEvent.setLetterText(letterText);
            LearningEventType learningEventType = LearningEventType.valueOf(csvRecord.get("learning_event_type"));
            letterLearningEvent.setLearningEventType(learningEventType);
            // Check if the event has already been stored in the database
            LetterLearningEvent existingLetterLearningEvent = letterLearningEventDao.read(time, androidId, application, letter);
            logger.info("existingLetterLearningEvent: " + existingLetterLearningEvent);
            if (existingLetterLearningEvent == null) {
                // Store the event in the database
                letterLearningEventDao.create(letterLearningEvent);
                logger.info("Stored LetterLearningEvent in database with ID " + letterLearningEvent.getId());
                jsonObject.put("result", "success");
                jsonObject.put("successMessage", "The LetterLearningEvent was stored in the database");
            } else {
                // Return error message saying that the event has already been uploaded
                logger.warn("The event has already been stored in the database");
                jsonObject.put("result", "error");
                jsonObject.put("errorMessage", "The event has already been stored in the database");
                response.setStatus(HttpStatus.CONFLICT.value());
            }
        }
    } catch (Exception ex) {
        logger.error(ex);
        jsonObject.put("result", "error");
        jsonObject.put("errorMessage", ex.getMessage());
        response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());
    }
    String jsonResponse = jsonObject.toString();
    logger.info("jsonResponse: " + jsonResponse);
    return jsonResponse;
}
Also used : Path(java.nio.file.Path) Calendar(java.util.Calendar) Reader(java.io.Reader) LetterLearningEvent(ai.elimu.model.analytics.LetterLearningEvent) Letter(ai.elimu.model.content.Letter) JSONObject(org.json.JSONObject) CSVParser(org.apache.commons.csv.CSVParser) LearningEventType(ai.elimu.model.v2.enums.analytics.LearningEventType) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord) File(java.io.File) MultipartFile(org.springframework.web.multipart.MultipartFile) Application(ai.elimu.model.admin.Application) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Aggregations

CSVRecord (org.apache.commons.csv.CSVRecord)127 CSVParser (org.apache.commons.csv.CSVParser)71 IOException (java.io.IOException)40 CSVFormat (org.apache.commons.csv.CSVFormat)40 ArrayList (java.util.ArrayList)36 Reader (java.io.Reader)24 StringReader (java.io.StringReader)22 InputStreamReader (java.io.InputStreamReader)18 FileReader (java.io.FileReader)16 Test (org.junit.Test)14 Path (java.nio.file.Path)13 HashMap (java.util.HashMap)11 File (java.io.File)10 PreparedStatement (java.sql.PreparedStatement)10 InputStream (java.io.InputStream)9 ResultSet (java.sql.ResultSet)9 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)9 CSVCommonsLoader (org.apache.phoenix.util.CSVCommonsLoader)9 BufferedReader (java.io.BufferedReader)8 Map (java.util.Map)7