Search in sources :

Example 11 with CellProcessor

use of org.supercsv.cellprocessor.ift.CellProcessor in project nifi by apache.

the class ValidateCsv method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final CsvPreference csvPref = getPreference(context, flowFile);
    final boolean header = context.getProperty(HEADER).asBoolean();
    final ComponentLog logger = getLogger();
    final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    final CellProcessor[] cellProcs = this.parseSchema(schema);
    final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
    final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
    final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
    final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
    final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
    final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
    if (!isWholeFFValidation) {
        invalidFF.set(session.create(flowFile));
        validFF.set(session.create(flowFile));
    }
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            NifiCsvListReader listReader = null;
            try {
                listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
                // handling of header
                if (header) {
                    List<String> headerList = listReader.read();
                    if (!isWholeFFValidation) {
                        invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
                            }
                        }));
                        validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineValid.get()));
                            }
                        }));
                        isFirstLineValid.set(false);
                        isFirstLineInvalid.set(false);
                    }
                }
                boolean stop = false;
                while (!stop) {
                    try {
                        final List<Object> list = listReader.read(cellProcs);
                        stop = list == null;
                        if (!isWholeFFValidation && !stop) {
                            validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(list, csvPref, isFirstLineValid.get()));
                                }
                            }));
                            okCount.set(okCount.get() + 1);
                            if (isFirstLineValid.get()) {
                                isFirstLineValid.set(false);
                            }
                        }
                    } catch (final SuperCsvException e) {
                        valid.set(false);
                        if (isWholeFFValidation) {
                            logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
                            break;
                        } else {
                            // we append the invalid line to the flow file that will be routed to invalid relationship
                            invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
                                }
                            }));
                            if (isFirstLineInvalid.get()) {
                                isFirstLineInvalid.set(false);
                            }
                        }
                    } finally {
                        if (!isWholeFFValidation) {
                            totalCount.set(totalCount.get() + 1);
                        }
                    }
                }
            } catch (final IOException e) {
                valid.set(false);
                logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
            } finally {
                if (listReader != null) {
                    listReader.close();
                }
            }
        }
    });
    if (isWholeFFValidation) {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
            session.getProvenanceReporter().route(flowFile, REL_VALID);
            session.transfer(flowFile, REL_VALID);
        } else {
            session.getProvenanceReporter().route(flowFile, REL_INVALID);
            session.transfer(flowFile, REL_INVALID);
        }
    } else {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.remove(invalidFF.get());
            session.remove(flowFile);
        } else if (okCount.get() != 0) {
            // because of the finally within the 'while' loop
            totalCount.set(totalCount.get() - 1);
            logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(flowFile);
        } else {
            logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(validFF.get());
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) SuperCsvException(org.supercsv.exception.SuperCsvException) CsvPreference(org.supercsv.prefs.CsvPreference) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) List(java.util.List) ArrayList(java.util.ArrayList) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 12 with CellProcessor

use of org.supercsv.cellprocessor.ift.CellProcessor in project webofneeds by researchstudio-sat.

the class SimonCsvStatisticsRecorder method recordMonitoringStatistics.

@Override
public void recordMonitoringStatistics() {
    ICsvMapWriter mapWriter = null;
    try {
        mapWriter = new CsvMapWriter(new FileWriter(createOutFileObject()), CsvPreference.STANDARD_PREFERENCE);
        final CellProcessor[] processors = getProcessors();
        // write the header
        mapWriter.writeHeader(header);
        // create a simon visitor that writes each line
        SimonVisitor visitor = new CsvSimonVisitor(mapWriter);
        // write the customer maps
        SimonVisitors.visitTree(SimonManager.getRootSimon(), visitor);
    } catch (IOException e) {
        logger.warn("could not write simon statistics", e);
    } finally {
        if (mapWriter != null) {
            try {
                mapWriter.close();
            } catch (IOException e) {
                logger.warn("could not close writer", e);
            }
        }
    }
}
Also used : ICsvMapWriter(org.supercsv.io.ICsvMapWriter) FileWriter(java.io.FileWriter) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) CsvMapWriter(org.supercsv.io.CsvMapWriter) ICsvMapWriter(org.supercsv.io.ICsvMapWriter) IOException(java.io.IOException)

Example 13 with CellProcessor

use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.

the class CsvFormatter method getProcessor.

/**
 * Returns array of cellprocessors, one for each field
 */
private CellProcessor[] getProcessor(List<Field> fields) {
    CellProcessor[] processor = new CellProcessor[fields.size()];
    int fieldCount = 0;
    for (Field field : fields) {
        if (field.getType() == FieldType.DATE) {
            String format = field.getConstraints().get(DelimitedSchema.DATE_FORMAT) == null ? null : (String) field.getConstraints().get(DelimitedSchema.DATE_FORMAT);
            processor[fieldCount++] = new Optional(new FmtDate(format == null ? "dd/MM/yyyy" : format));
        } else {
            processor[fieldCount++] = new Optional();
        }
    }
    return processor;
}
Also used : Field(org.apache.apex.malhar.contrib.parser.DelimitedSchema.Field) Optional(org.supercsv.cellprocessor.Optional) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) FmtDate(org.supercsv.cellprocessor.FmtDate)

Example 14 with CellProcessor

use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.

the class CellProcessorBuilder method getBooleanCellProcessor.

/**
 * Method to get cellprocessor for Boolean with constraints. These constraints
 * are evaluated against the Boolean field for which this cellprocessor is
 * defined.
 *
 * @param constraints
 *          map of constraints applicable to Boolean
 * @return CellProcessor
 */
private static CellProcessor getBooleanCellProcessor(Map<String, Object> constraints) {
    Boolean required = constraints.get(DelimitedSchema.REQUIRED) == null ? null : Boolean.parseBoolean((String) constraints.get(DelimitedSchema.REQUIRED));
    String trueValue = constraints.get(DelimitedSchema.TRUE_VALUE) == null ? null : (String) constraints.get(DelimitedSchema.TRUE_VALUE);
    String falseValue = constraints.get(DelimitedSchema.FALSE_VALUE) == null ? null : (String) constraints.get(DelimitedSchema.FALSE_VALUE);
    CellProcessor cellProcessor = null;
    if (StringUtils.isNotBlank(trueValue) && StringUtils.isNotBlank(falseValue)) {
        cellProcessor = new ParseBool(trueValue, falseValue);
    } else {
        cellProcessor = new ParseBool();
    }
    if (required == null || !required) {
        cellProcessor = addOptional(cellProcessor);
    }
    return cellProcessor;
}
Also used : DoubleCellProcessor(org.supercsv.cellprocessor.ift.DoubleCellProcessor) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) LongCellProcessor(org.supercsv.cellprocessor.ift.LongCellProcessor) ParseBool(org.supercsv.cellprocessor.ParseBool)

Example 15 with CellProcessor

use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.

the class CellProcessorBuilder method getStringCellProcessor.

/**
 * Method to get cellprocessor for String with constraints. These constraints
 * are evaluated against the String field for which this cellprocessor is
 * defined.
 *
 * @param constraints
 *          map of constraints applicable to String
 * @return CellProcessor
 */
private static CellProcessor getStringCellProcessor(Map<String, Object> constraints) {
    Boolean required = constraints.get(DelimitedSchema.REQUIRED) == null ? null : Boolean.parseBoolean((String) constraints.get(DelimitedSchema.REQUIRED));
    Integer strLen = constraints.get(DelimitedSchema.LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.LENGTH));
    Integer minLength = constraints.get(DelimitedSchema.MIN_LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.MIN_LENGTH));
    Integer maxLength = constraints.get(DelimitedSchema.MAX_LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.MAX_LENGTH));
    String equals = constraints.get(DelimitedSchema.EQUALS) == null ? null : (String) constraints.get(DelimitedSchema.EQUALS);
    String pattern = constraints.get(DelimitedSchema.REGEX_PATTERN) == null ? null : (String) constraints.get(DelimitedSchema.REGEX_PATTERN);
    CellProcessor cellProcessor = null;
    if (StringUtils.isNotBlank(equals)) {
        cellProcessor = new Equals(equals);
    } else if (StringUtils.isNotBlank(pattern)) {
        cellProcessor = new StrRegEx(pattern);
    } else if (strLen != null) {
        cellProcessor = new Strlen(strLen);
    } else if (maxLength != null || minLength != null) {
        Long min = minLength == null ? 0L : minLength;
        Long max = maxLength == null ? LMinMax.MAX_LONG : maxLength;
        cellProcessor = new StrMinMax(min, max);
    }
    if (required == null || !required) {
        cellProcessor = addOptional(cellProcessor);
    }
    return cellProcessor;
}
Also used : Equals(org.supercsv.cellprocessor.constraint.Equals) ParseLong(org.supercsv.cellprocessor.ParseLong) DoubleCellProcessor(org.supercsv.cellprocessor.ift.DoubleCellProcessor) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) LongCellProcessor(org.supercsv.cellprocessor.ift.LongCellProcessor) StrRegEx(org.supercsv.cellprocessor.constraint.StrRegEx) StrMinMax(org.supercsv.cellprocessor.constraint.StrMinMax) Strlen(org.supercsv.cellprocessor.constraint.Strlen)

Aggregations

CellProcessor (org.supercsv.cellprocessor.ift.CellProcessor)19 DoubleCellProcessor (org.supercsv.cellprocessor.ift.DoubleCellProcessor)7 LongCellProcessor (org.supercsv.cellprocessor.ift.LongCellProcessor)7 InputStreamReader (java.io.InputStreamReader)6 IOException (java.io.IOException)5 Equals (org.supercsv.cellprocessor.constraint.Equals)5 CsvMapReader (org.supercsv.io.CsvMapReader)4 ICsvMapReader (org.supercsv.io.ICsvMapReader)4 CsvPreference (org.supercsv.prefs.CsvPreference)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Field (org.apache.apex.malhar.contrib.parser.DelimitedSchema.Field)3 Optional (org.supercsv.cellprocessor.Optional)3 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 PreAuthorize (org.springframework.security.access.prepost.PreAuthorize)2 NotNull (org.supercsv.cellprocessor.constraint.NotNull)2 CommentStartsWith (org.supercsv.comment.CommentStartsWith)2 AmazonEC2ClientBuilder (com.amazonaws.services.ec2.AmazonEC2ClientBuilder)1