Search in sources :

Example 1 with StandaloneStatusReporter

use of datawave.ingest.test.StandaloneStatusReporter in project datawave by NationalSecurityAgency.

the class ColumnBasedHandlerTestUtil method processEvent.

public static void processEvent(DataTypeHandler<Text> handler, ExtendedDataTypeHandler<Text, BulkIngestKey, Value> edgeHandler, RawRecordContainer event, int expectedShardKeys, int expectedShardIndexKeys, int expectedShardReverseIndexKeys, int expectedEdgeKeys, boolean printKeysOnlyOnFail) {
    Assert.assertNotNull("Event was null.", event);
    Multimap<String, NormalizedContentInterface> eventFields = handler.getHelper(event.getDataType()).getEventFields(event);
    VirtualIngest vHelper = (VirtualIngest) handler.getHelper(event.getDataType());
    Multimap<String, NormalizedContentInterface> virtualFields = vHelper.getVirtualFields(eventFields);
    for (Map.Entry<String, NormalizedContentInterface> v : virtualFields.entries()) {
        eventFields.put(v.getKey(), v.getValue());
    }
    Multimap<BulkIngestKey, Value> results = handler.processBulk(new Text(), event, eventFields, new MockStatusReporter());
    Set<Key> shardKeys = new HashSet<>();
    Set<Key> shardIndexKeys = new HashSet<>();
    Set<Key> shardReverseIndexKeys = new HashSet<>();
    Set<Key> edgeKeys = new HashSet<>();
    Map<Text, Integer> countMap = Maps.newHashMap();
    for (BulkIngestKey k : results.keySet()) {
        Text tableName = k.getTableName();
        if (countMap.containsKey(tableName)) {
            countMap.put(tableName, countMap.get(tableName) + 1);
        } else {
            countMap.put(tableName, 1);
        }
    }
    for (Map.Entry<BulkIngestKey, Value> e : results.entries()) {
        BulkIngestKey bik = e.getKey();
        if (log.isDebugEnabled() && isDocumentKey(bik.getKey())) {
            log.debug("Found Document Key: " + bik.getKey());
            log.debug("value:\n" + e.getValue());
        }
        if (bik.getTableName().equals(shardTableName)) {
            shardKeys.add(bik.getKey());
        } else if (bik.getTableName().equals(shardIndexTableName)) {
            shardIndexKeys.add(bik.getKey());
        } else if (bik.getTableName().equals(shardReverseIndexTableName)) {
            shardReverseIndexKeys.add(bik.getKey());
        } else {
            Assert.fail("unknown table: " + bik.getTableName() + " key: " + bik.getKey());
        }
    }
    // Process edges
    countMap.put(edgeTableName, 0);
    if (null != edgeHandler) {
        MyCachingContextWriter contextWriter = new MyCachingContextWriter();
        StandaloneTaskAttemptContext<Text, RawRecordContainerImpl, BulkIngestKey, Value> ctx = new StandaloneTaskAttemptContext<>(((RawRecordContainerImpl) event).getConf(), new StandaloneStatusReporter());
        try {
            contextWriter.setup(ctx.getConfiguration(), false);
            edgeHandler.process(null, event, eventFields, ctx, contextWriter);
            contextWriter.commit(ctx);
            for (Map.Entry<BulkIngestKey, Value> entry : contextWriter.getCache().entries()) {
                if (entry.getKey().getTableName().equals(edgeTableName)) {
                    edgeKeys.add(entry.getKey().getKey());
                }
                if (countMap.containsKey(entry.getKey().getTableName())) {
                    countMap.put(entry.getKey().getTableName(), countMap.get(entry.getKey().getTableName()) + 1);
                } else {
                    countMap.put(entry.getKey().getTableName(), 1);
                }
            }
        } catch (Throwable t) {
            log.error("Error during edge processing", t);
            throw new RuntimeException(t);
        }
    }
    Set<String> keyPrint = new TreeSet<>();
    for (Key k : shardKeys) {
        keyPrint.add("shard key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check index keys
    for (Key k : shardIndexKeys) {
        keyPrint.add("shardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check reverse index keys
    for (Key k : shardReverseIndexKeys) {
        keyPrint.add("reverseShardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check edge keys
    for (Key k : edgeKeys) {
        keyPrint.add("edge key: " + k.getRow().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    try {
        if (!printKeysOnlyOnFail) {
            for (String keyString : keyPrint) {
                log.info(keyString.trim());
            }
        }
        Assert.assertTrue(countMap.get(shardTableName) == expectedShardKeys && countMap.get(shardIndexTableName) == expectedShardIndexKeys && countMap.get(shardReverseIndexTableName) == expectedShardReverseIndexKeys && countMap.get(edgeTableName) == expectedEdgeKeys);
    } catch (AssertionError ae) {
        if (printKeysOnlyOnFail) {
            for (String keyString : keyPrint) {
                log.info(keyString.trim());
            }
        }
        Assert.fail(String.format("Expected: %s shard, %s index, %s reverse index, and %s edge keys.\nFound: %s, %s, %s, and %s respectively", expectedShardKeys, expectedShardIndexKeys, expectedShardReverseIndexKeys, expectedEdgeKeys, countMap.get(shardTableName), countMap.get(shardIndexTableName), countMap.get(shardReverseIndexTableName), countMap.get(edgeTableName)));
    }
}
Also used : StandaloneTaskAttemptContext(datawave.ingest.test.StandaloneTaskAttemptContext) RawRecordContainerImpl(datawave.ingest.config.RawRecordContainerImpl) TreeSet(java.util.TreeSet) NormalizedContentInterface(datawave.ingest.data.config.NormalizedContentInterface) HashSet(java.util.HashSet) VirtualIngest(datawave.ingest.data.config.ingest.VirtualIngest) Text(org.apache.hadoop.io.Text) Value(org.apache.accumulo.core.data.Value) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) StandaloneStatusReporter(datawave.ingest.test.StandaloneStatusReporter) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey)

Example 2 with StandaloneStatusReporter

use of datawave.ingest.test.StandaloneStatusReporter in project datawave by NationalSecurityAgency.

the class AccumuloSetup method ingestTestData.

private void ingestTestData(Configuration conf, TestFileLoader loader) throws IOException, InterruptedException {
    log.debug("------------- ingestTestData -------------");
    Path tmpPath = new Path(tempFolder.toURI());
    // To prevent periodic test cases failing, added "---" prefix for UUID for test cases to support queries with _ANYFIELD_ starting with particular
    // letters.
    Path seqFile = new Path(tmpPath, "---" + UUID.randomUUID().toString());
    TaskAttemptID id = new TaskAttemptID("testJob", 0, TaskType.MAP, 0, 0);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, id);
    try (final RawLocalFileSystem rfs = createSequenceFile(conf, seqFile, loader)) {
        InputSplit split = new FileSplit(seqFile, 0, rfs.pathToFile(seqFile).length(), null);
        EventSequenceFileRecordReader<LongWritable> rr = new EventSequenceFileRecordReader<>();
        rr.initialize(split, context);
        Path ocPath = new Path(tmpPath, "oc");
        OutputCommitter oc = new FileOutputCommitter(ocPath, context);
        rfs.deleteOnExit(ocPath);
        StandaloneStatusReporter sr = new StandaloneStatusReporter();
        EventMapper<LongWritable, RawRecordContainer, Text, Mutation> mapper = new EventMapper<>();
        MapContext<LongWritable, RawRecordContainer, Text, Mutation> mapContext = new MapContextImpl<>(conf, id, rr, this.recordWriter, oc, sr, split);
        Mapper<LongWritable, RawRecordContainer, Text, Mutation>.Context con = new WrappedMapper<LongWritable, RawRecordContainer, Text, Mutation>().getMapContext(mapContext);
        mapper.run(con);
        mapper.cleanup(con);
    } finally {
        this.recordWriter.close(context);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) EventSequenceFileRecordReader(datawave.ingest.input.reader.event.EventSequenceFileRecordReader) EventMapper(datawave.ingest.mapreduce.EventMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) RawRecordContainer(datawave.ingest.data.RawRecordContainer) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) EventMapper(datawave.ingest.mapreduce.EventMapper) LongWritable(org.apache.hadoop.io.LongWritable) Mutation(org.apache.accumulo.core.data.Mutation) StandaloneStatusReporter(datawave.ingest.test.StandaloneStatusReporter) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 3 with StandaloneStatusReporter

use of datawave.ingest.test.StandaloneStatusReporter in project datawave by NationalSecurityAgency.

the class ColumnBasedHandlerTestUtil method processEvent.

public static void processEvent(DataTypeHandler<Text> handler, ExtendedDataTypeHandler<Text, BulkIngestKey, Value> edgeHandler, RawRecordContainer event, int expectedShardKeys, int expectedShardIndexKeys, int expectedShardReverseIndexKeys, int expectedEdgeKeys, boolean printKeysOnlyOnFail) {
    Assert.assertNotNull("Event was null.", event);
    Multimap<String, NormalizedContentInterface> eventFields = handler.getHelper(event.getDataType()).getEventFields(event);
    VirtualIngest vHelper = (VirtualIngest) handler.getHelper(event.getDataType());
    Multimap<String, NormalizedContentInterface> virtualFields = vHelper.getVirtualFields(eventFields);
    for (Map.Entry<String, NormalizedContentInterface> v : virtualFields.entries()) {
        eventFields.put(v.getKey(), v.getValue());
    }
    if (vHelper instanceof CompositeIngest) {
        CompositeIngest compIngest = (CompositeIngest) vHelper;
        Multimap<String, NormalizedContentInterface> compositeFields = compIngest.getCompositeFields(eventFields);
        for (String fieldName : compositeFields.keySet()) {
            // if this is an overloaded event field, we are replacing the existing data
            if (compIngest.isOverloadedCompositeField(fieldName))
                eventFields.removeAll(fieldName);
            eventFields.putAll(fieldName, compositeFields.get(fieldName));
        }
    }
    Multimap<BulkIngestKey, Value> results = handler.processBulk(new Text(), event, eventFields, new MockStatusReporter());
    Set<Key> shardKeys = new HashSet<>();
    Set<Key> shardIndexKeys = new HashSet<>();
    Set<Key> shardReverseIndexKeys = new HashSet<>();
    Set<Key> edgeKeys = new HashSet<>();
    Map<Text, Integer> countMap = Maps.newHashMap();
    for (BulkIngestKey k : results.keySet()) {
        Text tableName = k.getTableName();
        if (countMap.containsKey(tableName)) {
            countMap.put(tableName, countMap.get(tableName) + 1);
        } else {
            countMap.put(tableName, 1);
        }
    }
    for (Map.Entry<BulkIngestKey, Value> e : results.entries()) {
        BulkIngestKey bik = e.getKey();
        if (log.isDebugEnabled() && isDocumentKey(bik.getKey())) {
            log.debug("Found Document Key: " + bik.getKey());
            log.debug("value:\n" + e.getValue());
        }
        if (bik.getTableName().equals(shardTableName)) {
            shardKeys.add(bik.getKey());
        } else if (bik.getTableName().equals(shardIndexTableName)) {
            shardIndexKeys.add(bik.getKey());
        } else if (bik.getTableName().equals(shardReverseIndexTableName)) {
            shardReverseIndexKeys.add(bik.getKey());
        } else {
            Assert.fail("unknown table: " + bik.getTableName() + " key: " + bik.getKey());
        }
    }
    // Process edges
    countMap.put(edgeTableName, 0);
    if (null != edgeHandler) {
        MyCachingContextWriter contextWriter = new MyCachingContextWriter();
        StandaloneTaskAttemptContext<Text, RawRecordContainerImpl, BulkIngestKey, Value> ctx = new StandaloneTaskAttemptContext<>(((RawRecordContainerImpl) event).getConf(), new StandaloneStatusReporter());
        try {
            contextWriter.setup(ctx.getConfiguration(), false);
            edgeHandler.process(null, event, eventFields, ctx, contextWriter);
            contextWriter.commit(ctx);
            for (Map.Entry<BulkIngestKey, Value> entry : contextWriter.getCache().entries()) {
                if (entry.getKey().getTableName().equals(edgeTableName)) {
                    edgeKeys.add(entry.getKey().getKey());
                }
                if (countMap.containsKey(entry.getKey().getTableName())) {
                    countMap.put(entry.getKey().getTableName(), countMap.get(entry.getKey().getTableName()) + 1);
                } else {
                    countMap.put(entry.getKey().getTableName(), 1);
                }
            }
        } catch (Throwable t) {
            log.error("Error during edge processing", t);
            throw new RuntimeException(t);
        }
    }
    Set<String> keyPrint = new TreeSet<>();
    for (Key k : shardKeys) {
        keyPrint.add("shard key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check index keys
    for (Key k : shardIndexKeys) {
        keyPrint.add("shardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check reverse index keys
    for (Key k : shardReverseIndexKeys) {
        keyPrint.add("reverseShardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    // check edge keys
    for (Key k : edgeKeys) {
        keyPrint.add("edge key: " + k.getRow().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
    }
    try {
        if (!printKeysOnlyOnFail) {
            for (String keyString : keyPrint) {
                log.info(keyString.trim());
            }
        }
        if (expectedShardKeys > 0)
            Assert.assertEquals((int) countMap.get(shardTableName), expectedShardKeys);
        if (expectedShardIndexKeys > 0)
            Assert.assertEquals((int) countMap.get(shardIndexTableName), expectedShardIndexKeys);
        if (expectedShardReverseIndexKeys > 0)
            Assert.assertEquals((int) countMap.get(shardReverseIndexTableName), expectedShardReverseIndexKeys);
        if (expectedEdgeKeys > 0)
            Assert.assertEquals((int) countMap.get(edgeTableName), expectedEdgeKeys);
    } catch (AssertionError ae) {
        if (printKeysOnlyOnFail) {
            for (String keyString : keyPrint) {
                log.info(keyString.trim());
            }
        }
        Assert.fail(String.format("Expected: %s shard, %s index, %s reverse index, and %s edge keys.\nFound: %s, %s, %s, and %s respectively", expectedShardKeys, expectedShardIndexKeys, expectedShardReverseIndexKeys, expectedEdgeKeys, countMap.get(shardTableName), countMap.get(shardIndexTableName), countMap.get(shardReverseIndexTableName), countMap.get(edgeTableName)));
    }
}
Also used : StandaloneTaskAttemptContext(datawave.ingest.test.StandaloneTaskAttemptContext) RawRecordContainerImpl(datawave.ingest.config.RawRecordContainerImpl) TreeSet(java.util.TreeSet) NormalizedContentInterface(datawave.ingest.data.config.NormalizedContentInterface) HashSet(java.util.HashSet) VirtualIngest(datawave.ingest.data.config.ingest.VirtualIngest) Text(org.apache.hadoop.io.Text) CompositeIngest(datawave.ingest.data.config.ingest.CompositeIngest) Value(org.apache.accumulo.core.data.Value) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) StandaloneStatusReporter(datawave.ingest.test.StandaloneStatusReporter) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey)

Aggregations

StandaloneStatusReporter (datawave.ingest.test.StandaloneStatusReporter)3 Text (org.apache.hadoop.io.Text)3 RawRecordContainerImpl (datawave.ingest.config.RawRecordContainerImpl)2 NormalizedContentInterface (datawave.ingest.data.config.NormalizedContentInterface)2 VirtualIngest (datawave.ingest.data.config.ingest.VirtualIngest)2 BulkIngestKey (datawave.ingest.mapreduce.job.BulkIngestKey)2 StandaloneTaskAttemptContext (datawave.ingest.test.StandaloneTaskAttemptContext)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 TreeSet (java.util.TreeSet)2 Key (org.apache.accumulo.core.data.Key)2 Value (org.apache.accumulo.core.data.Value)2 RawRecordContainer (datawave.ingest.data.RawRecordContainer)1 CompositeIngest (datawave.ingest.data.config.ingest.CompositeIngest)1 EventSequenceFileRecordReader (datawave.ingest.input.reader.event.EventSequenceFileRecordReader)1 EventMapper (datawave.ingest.mapreduce.EventMapper)1 Mutation (org.apache.accumulo.core.data.Mutation)1 Path (org.apache.hadoop.fs.Path)1 RawLocalFileSystem (org.apache.hadoop.fs.RawLocalFileSystem)1 LongWritable (org.apache.hadoop.io.LongWritable)1