use of datawave.ingest.validation.FieldValidator in project datawave by NationalSecurityAgency.
the class EventMapper method processEvent.
/**
* This is where we apply a list of handlers to an event.
*
* @param key
* The key of the map process
* @param value
* The event
* @param handlers
* The list of handlers to apply
* @param fields
* The list which keeps the last set of fields (retained in case the caller needs to handle a thrown exception)
* @param context
* The context
* @throws Exception
*/
public void processEvent(K1 key, RawRecordContainer value, List<DataTypeHandler<K1>> handlers, Multimap<String, NormalizedContentInterface> fields, Context context) throws Exception {
IngestHelperInterface previousHelper = null;
for (DataTypeHandler<K1> handler : handlers) {
if (log.isTraceEnabled())
log.trace("executing handler: " + handler.getClass().getName());
// gather the fields
IngestHelperInterface thisHelper = handler.getHelper(value.getDataType());
// edge handler, depending on the event's data type.
if (thisHelper == null) {
if (log.isTraceEnabled())
log.trace("Aborting processing due to null ingest helper");
continue;
}
// *should* produce the same result.
if (null == previousHelper || !previousHelper.getClass().getName().equals(thisHelper.getClass().getName())) {
fields.clear();
Throwable e = null;
for (Map.Entry<String, NormalizedContentInterface> entry : getFields(value, handler).entries()) {
// noinspection ThrowableResultOfMethodCallIgnored
if (entry.getValue().getError() != null) {
e = entry.getValue().getError();
}
fields.put(entry.getKey(), entry.getValue());
}
if (e != null) {
throw new FieldNormalizationError("Failed getting all fields", e);
}
// Event based metrics
if (metricsEnabled) {
metricsLabels.clear();
metricsLabels.put("dataType", value.getDataType().typeName());
metricsService.collect(Metric.EVENT_COUNT, metricsLabels.get(), fields, 1L);
metricsService.collect(Metric.BYTE_COUNT, metricsLabels.get(), fields, (long) value.getRawData().length);
}
previousHelper = thisHelper;
}
Collection<FieldValidator> fieldValidators = validators.get(value.getDataType().outputName());
for (FieldValidator validator : fieldValidators) {
validator.validate(value, fields);
}
executeHandler(key, value, fields, handler, context);
context.progress();
}
}
use of datawave.ingest.validation.FieldValidator in project datawave by NationalSecurityAgency.
the class EventMapper method loadDataType.
/**
* Get the data type handlers for a given type name. This will also fill the dataTypeDiscardIntervalCache and the validators as a side effect.
*
* @return the data type handlers
*/
private List<DataTypeHandler<K1>> loadDataType(String typeStr, Context context) {
// Do not load the type twice
if (!typeMap.containsKey(typeStr)) {
typeMap.put(typeStr, new ArrayList<>());
long myInterval = context.getConfiguration().getLong(typeStr + "." + DISCARD_INTERVAL, interval);
dataTypeDiscardIntervalCache.put(typeStr, myInterval);
log.info("Setting up type: " + typeStr + " with interval " + myInterval);
if (!TypeRegistry.getTypeNames().contains(typeStr)) {
log.warn("Attempted to load configuration for a type that does not exist in the registry: " + typeStr);
} else {
Type t = TypeRegistry.getType(typeStr);
String fieldValidators = context.getConfiguration().get(typeStr + FieldValidator.FIELD_VALIDATOR_NAMES);
if (fieldValidators != null) {
String[] validatorClasses = StringUtils.split(fieldValidators, ",");
for (String validatorClass : validatorClasses) {
try {
Class<? extends FieldValidator> clazz = Class.forName(validatorClass).asSubclass(FieldValidator.class);
FieldValidator validator = clazz.newInstance();
validator.init(t, context.getConfiguration());
validators.put(typeStr, validator);
} catch (ClassNotFoundException e) {
log.error("Error finding validator " + validatorClass, e);
} catch (InstantiationException | IllegalAccessException e) {
log.error("Error creating validator " + validatorClass, e);
}
}
}
String[] handlerClassNames = t.getDefaultDataTypeHandlers();
if (handlerClassNames != null) {
for (String handlerClassName : handlerClassNames) {
log.info("Configuring handler: " + handlerClassName);
try {
@SuppressWarnings("unchecked") Class<? extends DataTypeHandler<K1>> clazz = (Class<? extends DataTypeHandler<K1>>) Class.forName(handlerClassName);
DataTypeHandler<K1> h = clazz.newInstance();
// Create a counter initialized to zero for all handler types.
getCounter(context, IngestOutput.ROWS_CREATED.name(), h.getClass().getSimpleName()).increment(0);
// Trick here. Set the data.name parameter to type T, then call setup on the DataTypeHandler
Configuration clone = new Configuration(context.getConfiguration());
clone.set(DataTypeHelper.Properties.DATA_NAME, t.typeName());
// Use the StandaloneReporter and StandaloneTaskAttemptContext for the Handlers. Because the StandaloneTaskAttemptContext
// is a subclass of TaskInputOutputContext and TaskAttemptContext is not. We are using this to record the counters during
// processing. We will need to add the counters in the StandaloneReporter to the Map.Context in the close call.
// TaskAttemptContext newContext = new TaskAttemptContext(clone, context.getTaskAttemptID());
StandaloneTaskAttemptContext<K1, V1, K2, V2> newContext = new StandaloneTaskAttemptContext<>(clone, context.getTaskAttemptID(), reporter);
h.setup(newContext);
typeMap.get(typeStr).add(h);
} catch (ClassNotFoundException e) {
log.error("Error finding DataTypeHandler " + handlerClassName, e);
} catch (InstantiationException | IllegalAccessException e) {
log.error("Error creating DataTypeHandler " + handlerClassName, e);
}
}
}
}
log.info("EventMapper configured with the following handlers for " + typeStr + ": " + typeMap.get(typeStr));
}
return typeMap.get(typeStr);
}
Aggregations