Search in sources :

Example 1 with FieldValidator

use of datawave.ingest.validation.FieldValidator in project datawave by NationalSecurityAgency.

the class EventMapper method processEvent.

/**
 * This is where we apply a list of handlers to an event.
 *
 * @param key
 *            The key of the map process
 * @param value
 *            The event
 * @param handlers
 *            The list of handlers to apply
 * @param fields
 *            The list which keeps the last set of fields (retained in case the caller needs to handle a thrown exception)
 * @param context
 *            The context
 * @throws Exception
 */
public void processEvent(K1 key, RawRecordContainer value, List<DataTypeHandler<K1>> handlers, Multimap<String, NormalizedContentInterface> fields, Context context) throws Exception {
    IngestHelperInterface previousHelper = null;
    for (DataTypeHandler<K1> handler : handlers) {
        if (log.isTraceEnabled())
            log.trace("executing handler: " + handler.getClass().getName());
        // gather the fields
        IngestHelperInterface thisHelper = handler.getHelper(value.getDataType());
        // edge handler, depending on the event's data type.
        if (thisHelper == null) {
            if (log.isTraceEnabled())
                log.trace("Aborting processing due to null ingest helper");
            continue;
        }
        // *should* produce the same result.
        if (null == previousHelper || !previousHelper.getClass().getName().equals(thisHelper.getClass().getName())) {
            fields.clear();
            Throwable e = null;
            for (Map.Entry<String, NormalizedContentInterface> entry : getFields(value, handler).entries()) {
                // noinspection ThrowableResultOfMethodCallIgnored
                if (entry.getValue().getError() != null) {
                    e = entry.getValue().getError();
                }
                fields.put(entry.getKey(), entry.getValue());
            }
            if (e != null) {
                throw new FieldNormalizationError("Failed getting all fields", e);
            }
            // Event based metrics
            if (metricsEnabled) {
                metricsLabels.clear();
                metricsLabels.put("dataType", value.getDataType().typeName());
                metricsService.collect(Metric.EVENT_COUNT, metricsLabels.get(), fields, 1L);
                metricsService.collect(Metric.BYTE_COUNT, metricsLabels.get(), fields, (long) value.getRawData().length);
            }
            previousHelper = thisHelper;
        }
        Collection<FieldValidator> fieldValidators = validators.get(value.getDataType().outputName());
        for (FieldValidator validator : fieldValidators) {
            validator.validate(value, fields);
        }
        executeHandler(key, value, fields, handler, context);
        context.progress();
    }
}
Also used : IngestHelperInterface(datawave.ingest.data.config.ingest.IngestHelperInterface) FieldValidator(datawave.ingest.validation.FieldValidator) NormalizedContentInterface(datawave.ingest.data.config.NormalizedContentInterface) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 2 with FieldValidator

use of datawave.ingest.validation.FieldValidator in project datawave by NationalSecurityAgency.

the class EventMapper method loadDataType.

/**
 * Get the data type handlers for a given type name. This will also fill the dataTypeDiscardIntervalCache and the validators as a side effect.
 *
 * @return the data type handlers
 */
private List<DataTypeHandler<K1>> loadDataType(String typeStr, Context context) {
    // Do not load the type twice
    if (!typeMap.containsKey(typeStr)) {
        typeMap.put(typeStr, new ArrayList<>());
        long myInterval = context.getConfiguration().getLong(typeStr + "." + DISCARD_INTERVAL, interval);
        dataTypeDiscardIntervalCache.put(typeStr, myInterval);
        log.info("Setting up type: " + typeStr + " with interval " + myInterval);
        if (!TypeRegistry.getTypeNames().contains(typeStr)) {
            log.warn("Attempted to load configuration for a type that does not exist in the registry: " + typeStr);
        } else {
            Type t = TypeRegistry.getType(typeStr);
            String fieldValidators = context.getConfiguration().get(typeStr + FieldValidator.FIELD_VALIDATOR_NAMES);
            if (fieldValidators != null) {
                String[] validatorClasses = StringUtils.split(fieldValidators, ",");
                for (String validatorClass : validatorClasses) {
                    try {
                        Class<? extends FieldValidator> clazz = Class.forName(validatorClass).asSubclass(FieldValidator.class);
                        FieldValidator validator = clazz.newInstance();
                        validator.init(t, context.getConfiguration());
                        validators.put(typeStr, validator);
                    } catch (ClassNotFoundException e) {
                        log.error("Error finding validator " + validatorClass, e);
                    } catch (InstantiationException | IllegalAccessException e) {
                        log.error("Error creating validator " + validatorClass, e);
                    }
                }
            }
            String[] handlerClassNames = t.getDefaultDataTypeHandlers();
            if (handlerClassNames != null) {
                for (String handlerClassName : handlerClassNames) {
                    log.info("Configuring handler: " + handlerClassName);
                    try {
                        @SuppressWarnings("unchecked") Class<? extends DataTypeHandler<K1>> clazz = (Class<? extends DataTypeHandler<K1>>) Class.forName(handlerClassName);
                        DataTypeHandler<K1> h = clazz.newInstance();
                        // Create a counter initialized to zero for all handler types.
                        getCounter(context, IngestOutput.ROWS_CREATED.name(), h.getClass().getSimpleName()).increment(0);
                        // Trick here. Set the data.name parameter to type T, then call setup on the DataTypeHandler
                        Configuration clone = new Configuration(context.getConfiguration());
                        clone.set(DataTypeHelper.Properties.DATA_NAME, t.typeName());
                        // Use the StandaloneReporter and StandaloneTaskAttemptContext for the Handlers. Because the StandaloneTaskAttemptContext
                        // is a subclass of TaskInputOutputContext and TaskAttemptContext is not. We are using this to record the counters during
                        // processing. We will need to add the counters in the StandaloneReporter to the Map.Context in the close call.
                        // TaskAttemptContext newContext = new TaskAttemptContext(clone, context.getTaskAttemptID());
                        StandaloneTaskAttemptContext<K1, V1, K2, V2> newContext = new StandaloneTaskAttemptContext<>(clone, context.getTaskAttemptID(), reporter);
                        h.setup(newContext);
                        typeMap.get(typeStr).add(h);
                    } catch (ClassNotFoundException e) {
                        log.error("Error finding DataTypeHandler " + handlerClassName, e);
                    } catch (InstantiationException | IllegalAccessException e) {
                        log.error("Error creating DataTypeHandler " + handlerClassName, e);
                    }
                }
            }
        }
        log.info("EventMapper configured with the following handlers for " + typeStr + ": " + typeMap.get(typeStr));
    }
    return typeMap.get(typeStr);
}
Also used : StandaloneTaskAttemptContext(datawave.ingest.test.StandaloneTaskAttemptContext) Configuration(org.apache.hadoop.conf.Configuration) MetricsConfiguration(datawave.ingest.mapreduce.job.metrics.MetricsConfiguration) FieldValidator(datawave.ingest.validation.FieldValidator) ErrorDataTypeHandler(datawave.ingest.mapreduce.handler.error.ErrorDataTypeHandler) ExtendedDataTypeHandler(datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler) DataTypeHandler(datawave.ingest.mapreduce.handler.DataTypeHandler) Type(datawave.ingest.data.Type)

Aggregations

FieldValidator (datawave.ingest.validation.FieldValidator)2 Type (datawave.ingest.data.Type)1 NormalizedContentInterface (datawave.ingest.data.config.NormalizedContentInterface)1 IngestHelperInterface (datawave.ingest.data.config.ingest.IngestHelperInterface)1 DataTypeHandler (datawave.ingest.mapreduce.handler.DataTypeHandler)1 ExtendedDataTypeHandler (datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler)1 ErrorDataTypeHandler (datawave.ingest.mapreduce.handler.error.ErrorDataTypeHandler)1 MetricsConfiguration (datawave.ingest.mapreduce.job.metrics.MetricsConfiguration)1 StandaloneTaskAttemptContext (datawave.ingest.test.StandaloneTaskAttemptContext)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 TreeMap (java.util.TreeMap)1 Configuration (org.apache.hadoop.conf.Configuration)1