use of datawave.ingest.data.config.ingest.CompositeIngest in project datawave by NationalSecurityAgency.
the class ColumnBasedHandlerTestUtil method processEvent.
public static void processEvent(DataTypeHandler<Text> handler, ExtendedDataTypeHandler<Text, BulkIngestKey, Value> edgeHandler, RawRecordContainer event, int expectedShardKeys, int expectedShardIndexKeys, int expectedShardReverseIndexKeys, int expectedEdgeKeys, boolean printKeysOnlyOnFail) {
Assert.assertNotNull("Event was null.", event);
Multimap<String, NormalizedContentInterface> eventFields = handler.getHelper(event.getDataType()).getEventFields(event);
VirtualIngest vHelper = (VirtualIngest) handler.getHelper(event.getDataType());
Multimap<String, NormalizedContentInterface> virtualFields = vHelper.getVirtualFields(eventFields);
for (Map.Entry<String, NormalizedContentInterface> v : virtualFields.entries()) {
eventFields.put(v.getKey(), v.getValue());
}
if (vHelper instanceof CompositeIngest) {
CompositeIngest compIngest = (CompositeIngest) vHelper;
Multimap<String, NormalizedContentInterface> compositeFields = compIngest.getCompositeFields(eventFields);
for (String fieldName : compositeFields.keySet()) {
// if this is an overloaded event field, we are replacing the existing data
if (compIngest.isOverloadedCompositeField(fieldName))
eventFields.removeAll(fieldName);
eventFields.putAll(fieldName, compositeFields.get(fieldName));
}
}
Multimap<BulkIngestKey, Value> results = handler.processBulk(new Text(), event, eventFields, new MockStatusReporter());
Set<Key> shardKeys = new HashSet<>();
Set<Key> shardIndexKeys = new HashSet<>();
Set<Key> shardReverseIndexKeys = new HashSet<>();
Set<Key> edgeKeys = new HashSet<>();
Map<Text, Integer> countMap = Maps.newHashMap();
for (BulkIngestKey k : results.keySet()) {
Text tableName = k.getTableName();
if (countMap.containsKey(tableName)) {
countMap.put(tableName, countMap.get(tableName) + 1);
} else {
countMap.put(tableName, 1);
}
}
for (Map.Entry<BulkIngestKey, Value> e : results.entries()) {
BulkIngestKey bik = e.getKey();
if (log.isDebugEnabled() && isDocumentKey(bik.getKey())) {
log.debug("Found Document Key: " + bik.getKey());
log.debug("value:\n" + e.getValue());
}
if (bik.getTableName().equals(shardTableName)) {
shardKeys.add(bik.getKey());
} else if (bik.getTableName().equals(shardIndexTableName)) {
shardIndexKeys.add(bik.getKey());
} else if (bik.getTableName().equals(shardReverseIndexTableName)) {
shardReverseIndexKeys.add(bik.getKey());
} else {
Assert.fail("unknown table: " + bik.getTableName() + " key: " + bik.getKey());
}
}
// Process edges
countMap.put(edgeTableName, 0);
if (null != edgeHandler) {
MyCachingContextWriter contextWriter = new MyCachingContextWriter();
StandaloneTaskAttemptContext<Text, RawRecordContainerImpl, BulkIngestKey, Value> ctx = new StandaloneTaskAttemptContext<>(((RawRecordContainerImpl) event).getConf(), new StandaloneStatusReporter());
try {
contextWriter.setup(ctx.getConfiguration(), false);
edgeHandler.process(null, event, eventFields, ctx, contextWriter);
contextWriter.commit(ctx);
for (Map.Entry<BulkIngestKey, Value> entry : contextWriter.getCache().entries()) {
if (entry.getKey().getTableName().equals(edgeTableName)) {
edgeKeys.add(entry.getKey().getKey());
}
if (countMap.containsKey(entry.getKey().getTableName())) {
countMap.put(entry.getKey().getTableName(), countMap.get(entry.getKey().getTableName()) + 1);
} else {
countMap.put(entry.getKey().getTableName(), 1);
}
}
} catch (Throwable t) {
log.error("Error during edge processing", t);
throw new RuntimeException(t);
}
}
Set<String> keyPrint = new TreeSet<>();
for (Key k : shardKeys) {
keyPrint.add("shard key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
}
// check index keys
for (Key k : shardIndexKeys) {
keyPrint.add("shardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
}
// check reverse index keys
for (Key k : shardReverseIndexKeys) {
keyPrint.add("reverseShardIndex key: " + k.getRow() + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
}
// check edge keys
for (Key k : edgeKeys) {
keyPrint.add("edge key: " + k.getRow().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + "\n");
}
try {
if (!printKeysOnlyOnFail) {
for (String keyString : keyPrint) {
log.info(keyString.trim());
}
}
if (expectedShardKeys > 0)
Assert.assertEquals((int) countMap.get(shardTableName), expectedShardKeys);
if (expectedShardIndexKeys > 0)
Assert.assertEquals((int) countMap.get(shardIndexTableName), expectedShardIndexKeys);
if (expectedShardReverseIndexKeys > 0)
Assert.assertEquals((int) countMap.get(shardReverseIndexTableName), expectedShardReverseIndexKeys);
if (expectedEdgeKeys > 0)
Assert.assertEquals((int) countMap.get(edgeTableName), expectedEdgeKeys);
} catch (AssertionError ae) {
if (printKeysOnlyOnFail) {
for (String keyString : keyPrint) {
log.info(keyString.trim());
}
}
Assert.fail(String.format("Expected: %s shard, %s index, %s reverse index, and %s edge keys.\nFound: %s, %s, %s, and %s respectively", expectedShardKeys, expectedShardIndexKeys, expectedShardReverseIndexKeys, expectedEdgeKeys, countMap.get(shardTableName), countMap.get(shardIndexTableName), countMap.get(shardReverseIndexTableName), countMap.get(edgeTableName)));
}
}
use of datawave.ingest.data.config.ingest.CompositeIngest in project datawave by NationalSecurityAgency.
the class CompositeIndexTest method setupClass.
@BeforeClass
public static void setupClass() throws Exception {
System.setProperty("subject.dn.pattern", "(?:^|,)\\s*OU\\s*=\\s*My Department\\s*(?:,|$)");
setupConfiguration(conf);
AbstractColumnBasedHandler<Text> dataTypeHandler = new AbstractColumnBasedHandler<>();
dataTypeHandler.setup(new TaskAttemptContextImpl(conf, new TaskAttemptID()));
TestIngestHelper ingestHelper = new TestIngestHelper();
ingestHelper.setup(conf);
// create and process events with WKT data
RawRecordContainer record = new RawRecordContainerImpl();
Multimap<BulkIngestKey, Value> keyValues = HashMultimap.create();
int recNum = 1;
for (int dataIdx = 0; dataIdx < 2; dataIdx++) {
String beginDate;
String[] wktData;
Integer[] wktByteLengthData;
long[] dates;
boolean useCompositeIngest;
if (dataIdx == 0) {
beginDate = LEGACY_BEGIN_DATE;
wktData = wktLegacyData;
wktByteLengthData = wktByteLengthLegacyData;
dates = legacyDates;
useCompositeIngest = false;
} else {
beginDate = COMPOSITE_BEGIN_DATE;
wktData = wktCompositeData;
wktByteLengthData = wktByteLengthCompositeData;
dates = compositeDates;
useCompositeIngest = true;
}
for (int i = 0; i < wktData.length; i++) {
record.clear();
record.setDataType(new Type(DATA_TYPE_NAME, TestIngestHelper.class, (Class) null, (String[]) null, 1, (String[]) null));
record.setRawFileName("geodata_" + recNum + ".dat");
record.setRawRecordNumber(recNum++);
record.setDate(formatter.parse(beginDate).getTime() + dates[i]);
record.setRawData((wktData[i] + "|" + ((wktByteLengthData[i] != null) ? Integer.toString(wktByteLengthData[i]) : "")).getBytes("UTF8"));
record.generateId(null);
record.setVisibility(new ColumnVisibility(AUTHS));
final Multimap<String, NormalizedContentInterface> fields = ingestHelper.getEventFields(record);
if (useCompositeIngest && ingestHelper instanceof CompositeIngest) {
Multimap<String, NormalizedContentInterface> compositeFields = ingestHelper.getCompositeFields(fields);
for (String fieldName : compositeFields.keySet()) {
// if this is an overloaded event field, we are replacing the existing data
if (ingestHelper.isOverloadedCompositeField(fieldName))
fields.removeAll(fieldName);
fields.putAll(fieldName, compositeFields.get(fieldName));
}
}
Multimap kvPairs = dataTypeHandler.processBulk(new Text(), record, fields, new MockStatusReporter());
keyValues.putAll(kvPairs);
dataTypeHandler.getMetadata().addEvent(ingestHelper, record, fields);
}
}
keyValues.putAll(dataTypeHandler.getMetadata().getBulkMetadata());
// Write the composite transition date manually
Key tdKey = new Key(new Text(GEO_FIELD), new Text(ColumnFamilyConstants.COLF_CITD), new Text(DATA_TYPE_NAME + "\0" + COMPOSITE_BEGIN_DATE), new Text(), new SimpleDateFormat(CompositeMetadataHelper.transitionDateFormat).parse(COMPOSITE_BEGIN_DATE).getTime());
keyValues.put(new BulkIngestKey(new Text(TableName.METADATA), tdKey), new Value());
// write these values to their respective tables
instance = new InMemoryInstance();
Connector connector = instance.getConnector("root", PASSWORD);
connector.securityOperations().changeUserAuthorizations("root", new Authorizations(AUTHS));
writeKeyValues(connector, keyValues);
ivaratorCacheDirConfigs = Collections.singletonList(new IvaratorCacheDirConfig(temporaryFolder.newFolder().toURI().toString()));
}
use of datawave.ingest.data.config.ingest.CompositeIngest in project datawave by NationalSecurityAgency.
the class EventMapper method getFields.
public Multimap<String, NormalizedContentInterface> getFields(RawRecordContainer value, DataTypeHandler<K1> handler) throws Exception {
Multimap<String, NormalizedContentInterface> newFields;
// Parse the event into its field names and field values using the DataTypeHandler's BaseIngestHelper object.
newFields = handler.getHelper(value.getDataType()).getEventFields(value);
// Also get the virtual fields, if applicable.
if (handler.getHelper(value.getDataType()) instanceof VirtualIngest) {
VirtualIngest vHelper = (VirtualIngest) handler.getHelper(value.getDataType());
Multimap<String, NormalizedContentInterface> virtualFields = vHelper.getVirtualFields(newFields);
for (Entry<String, NormalizedContentInterface> v : virtualFields.entries()) newFields.put(v.getKey(), v.getValue());
}
// Also get the composite fields, if applicable
if (handler.getHelper(value.getDataType()) instanceof CompositeIngest) {
CompositeIngest vHelper = (CompositeIngest) handler.getHelper(value.getDataType());
Multimap<String, NormalizedContentInterface> compositeFields = vHelper.getCompositeFields(newFields);
for (String fieldName : compositeFields.keySet()) {
// if this is an overloaded composite field, we are replacing the existing field data
if (vHelper.isOverloadedCompositeField(fieldName))
newFields.removeAll(fieldName);
newFields.putAll(fieldName, compositeFields.get(fieldName));
}
}
// Create a LOAD_DATE parameter, which is the current time in milliseconds, for all datatypes
long loadDate = now.get();
NormalizedFieldAndValue loadDateValue = new NormalizedFieldAndValue(LOAD_DATE_FIELDNAME, Long.toString(loadDate));
// set an indexed field value for use by the date index data type handler
loadDateValue.setIndexedFieldValue(dateNormalizer.normalizeDelegateType(new Date(loadDate)));
newFields.put(LOAD_DATE_FIELDNAME, loadDateValue);
String seqFileName = null;
// place the sequence filename into the event
if (createSequenceFileName) {
seqFileName = NDC.peek();
if (trimSequenceFileName) {
seqFileName = StringUtils.substringAfterLast(seqFileName, "/");
}
if (null != seqFileName) {
StringBuilder seqFile = new StringBuilder(seqFileName);
seqFile.append(SRC_FILE_DEL).append(offset);
if (null != splitStart) {
seqFile.append(SRC_FILE_DEL).append(splitStart);
}
newFields.put(SEQUENCE_FILE_FIELDNAME, new NormalizedFieldAndValue(SEQUENCE_FILE_FIELDNAME, seqFile.toString()));
}
}
if (createRawFileName && !value.getRawFileName().isEmpty() && !value.getRawFileName().equals(seqFileName)) {
newFields.put(RAW_FILE_FIELDNAME, new NormalizedFieldAndValue(RAW_FILE_FIELDNAME, value.getRawFileName()));
}
// Also if this helper needs to filter the fields before returning, apply now
if (handler.getHelper(value.getDataType()) instanceof FilterIngest) {
FilterIngest fHelper = (FilterIngest) handler.getHelper(value.getDataType());
fHelper.filter(newFields);
}
return newFields;
}
Aggregations