use of org.supercsv.cellprocessor.ift.CellProcessor in project nifi by apache.
the class ValidateCsv method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final CsvPreference csvPref = getPreference(context, flowFile);
final boolean header = context.getProperty(HEADER).asBoolean();
final ComponentLog logger = getLogger();
final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final CellProcessor[] cellProcs = this.parseSchema(schema);
final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
if (!isWholeFFValidation) {
invalidFF.set(session.create(flowFile));
validFF.set(session.create(flowFile));
}
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
NifiCsvListReader listReader = null;
try {
listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
// handling of header
if (header) {
List<String> headerList = listReader.read();
if (!isWholeFFValidation) {
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
}
}));
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineValid.get()));
}
}));
isFirstLineValid.set(false);
isFirstLineInvalid.set(false);
}
}
boolean stop = false;
while (!stop) {
try {
final List<Object> list = listReader.read(cellProcs);
stop = list == null;
if (!isWholeFFValidation && !stop) {
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(list, csvPref, isFirstLineValid.get()));
}
}));
okCount.set(okCount.get() + 1);
if (isFirstLineValid.get()) {
isFirstLineValid.set(false);
}
}
} catch (final SuperCsvException e) {
valid.set(false);
if (isWholeFFValidation) {
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
break;
} else {
// we append the invalid line to the flow file that will be routed to invalid relationship
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
}
}));
if (isFirstLineInvalid.get()) {
isFirstLineInvalid.set(false);
}
}
} finally {
if (!isWholeFFValidation) {
totalCount.set(totalCount.get() + 1);
}
}
}
} catch (final IOException e) {
valid.set(false);
logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
} finally {
if (listReader != null) {
listReader.close();
}
}
}
});
if (isWholeFFValidation) {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_VALID);
session.transfer(flowFile, REL_VALID);
} else {
session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.transfer(flowFile, REL_INVALID);
}
} else {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.remove(invalidFF.get());
session.remove(flowFile);
} else if (okCount.get() != 0) {
// because of the finally within the 'while' loop
totalCount.set(totalCount.get() - 1);
logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(flowFile);
} else {
logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(validFF.get());
session.remove(flowFile);
}
}
}
use of org.supercsv.cellprocessor.ift.CellProcessor in project webofneeds by researchstudio-sat.
the class SimonCsvStatisticsRecorder method recordMonitoringStatistics.
@Override
public void recordMonitoringStatistics() {
ICsvMapWriter mapWriter = null;
try {
mapWriter = new CsvMapWriter(new FileWriter(createOutFileObject()), CsvPreference.STANDARD_PREFERENCE);
final CellProcessor[] processors = getProcessors();
// write the header
mapWriter.writeHeader(header);
// create a simon visitor that writes each line
SimonVisitor visitor = new CsvSimonVisitor(mapWriter);
// write the customer maps
SimonVisitors.visitTree(SimonManager.getRootSimon(), visitor);
} catch (IOException e) {
logger.warn("could not write simon statistics", e);
} finally {
if (mapWriter != null) {
try {
mapWriter.close();
} catch (IOException e) {
logger.warn("could not close writer", e);
}
}
}
}
use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.
the class CsvFormatter method getProcessor.
/**
* Returns array of cellprocessors, one for each field
*/
private CellProcessor[] getProcessor(List<Field> fields) {
CellProcessor[] processor = new CellProcessor[fields.size()];
int fieldCount = 0;
for (Field field : fields) {
if (field.getType() == FieldType.DATE) {
String format = field.getConstraints().get(DelimitedSchema.DATE_FORMAT) == null ? null : (String) field.getConstraints().get(DelimitedSchema.DATE_FORMAT);
processor[fieldCount++] = new Optional(new FmtDate(format == null ? "dd/MM/yyyy" : format));
} else {
processor[fieldCount++] = new Optional();
}
}
return processor;
}
use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.
the class CellProcessorBuilder method getBooleanCellProcessor.
/**
* Method to get cellprocessor for Boolean with constraints. These constraints
* are evaluated against the Boolean field for which this cellprocessor is
* defined.
*
* @param constraints
* map of constraints applicable to Boolean
* @return CellProcessor
*/
private static CellProcessor getBooleanCellProcessor(Map<String, Object> constraints) {
Boolean required = constraints.get(DelimitedSchema.REQUIRED) == null ? null : Boolean.parseBoolean((String) constraints.get(DelimitedSchema.REQUIRED));
String trueValue = constraints.get(DelimitedSchema.TRUE_VALUE) == null ? null : (String) constraints.get(DelimitedSchema.TRUE_VALUE);
String falseValue = constraints.get(DelimitedSchema.FALSE_VALUE) == null ? null : (String) constraints.get(DelimitedSchema.FALSE_VALUE);
CellProcessor cellProcessor = null;
if (StringUtils.isNotBlank(trueValue) && StringUtils.isNotBlank(falseValue)) {
cellProcessor = new ParseBool(trueValue, falseValue);
} else {
cellProcessor = new ParseBool();
}
if (required == null || !required) {
cellProcessor = addOptional(cellProcessor);
}
return cellProcessor;
}
use of org.supercsv.cellprocessor.ift.CellProcessor in project apex-malhar by apache.
the class CellProcessorBuilder method getStringCellProcessor.
/**
* Method to get cellprocessor for String with constraints. These constraints
* are evaluated against the String field for which this cellprocessor is
* defined.
*
* @param constraints
* map of constraints applicable to String
* @return CellProcessor
*/
private static CellProcessor getStringCellProcessor(Map<String, Object> constraints) {
Boolean required = constraints.get(DelimitedSchema.REQUIRED) == null ? null : Boolean.parseBoolean((String) constraints.get(DelimitedSchema.REQUIRED));
Integer strLen = constraints.get(DelimitedSchema.LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.LENGTH));
Integer minLength = constraints.get(DelimitedSchema.MIN_LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.MIN_LENGTH));
Integer maxLength = constraints.get(DelimitedSchema.MAX_LENGTH) == null ? null : Integer.parseInt((String) constraints.get(DelimitedSchema.MAX_LENGTH));
String equals = constraints.get(DelimitedSchema.EQUALS) == null ? null : (String) constraints.get(DelimitedSchema.EQUALS);
String pattern = constraints.get(DelimitedSchema.REGEX_PATTERN) == null ? null : (String) constraints.get(DelimitedSchema.REGEX_PATTERN);
CellProcessor cellProcessor = null;
if (StringUtils.isNotBlank(equals)) {
cellProcessor = new Equals(equals);
} else if (StringUtils.isNotBlank(pattern)) {
cellProcessor = new StrRegEx(pattern);
} else if (strLen != null) {
cellProcessor = new Strlen(strLen);
} else if (maxLength != null || minLength != null) {
Long min = minLength == null ? 0L : minLength;
Long max = maxLength == null ? LMinMax.MAX_LONG : maxLength;
cellProcessor = new StrMinMax(min, max);
}
if (required == null || !required) {
cellProcessor = addOptional(cellProcessor);
}
return cellProcessor;
}
Aggregations