use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.
the class ExtendedContentIndexingColumnBasedHandler method createIndexColumn.
/**
* Creates and writes the BulkIngestKey for the global (reverse) index to the ContextWriter (instead of the Multimap that the {@link ShardedDataTypeHandler}
* uses).
*
* @param event
* @param contextWriter
* @param context
* @param nFV
* @param shardId
* @param tableName
* @param visibility
* @param replacedMalformedUTF8
* @param deleteMode
* @throws IOException
* @throws InterruptedException
*/
protected void createIndexColumn(RawRecordContainer event, ContextWriter<KEYOUT, VALUEOUT> contextWriter, TaskInputOutputContext<KEYIN, ? extends RawRecordContainer, KEYOUT, VALUEOUT> context, NormalizedContentInterface nFV, byte[] shardId, Text tableName, byte[] visibility, boolean replacedMalformedUTF8, boolean deleteMode) throws IOException, InterruptedException {
// Shard Global Index Table Structure
// Row: Field Value
// Colf: Field Name
// Colq: Shard Id : DataType
// Value: UID
Text colf = new Text(nFV.getIndexedFieldName());
Text colq = new Text(shardId);
TextUtil.textAppend(colq, this.eventDataTypeName, replacedMalformedUTF8);
Key k = this.createIndexKey(nFV.getIndexedFieldValue().getBytes(), colf, colq, visibility, event.getDate(), deleteMode);
// Create a UID object for the Value
Uid.List.Builder uidBuilder = Uid.List.newBuilder();
uidBuilder.setIGNORE(false);
if (!deleteMode) {
uidBuilder.setCOUNT(1);
uidBuilder.addUID(this.eventUid);
} else {
uidBuilder.setCOUNT(-1);
uidBuilder.addUID(this.eventUid);
}
Uid.List uidList = uidBuilder.build();
Value val = new Value(uidList.toByteArray());
BulkIngestKey bKey = new BulkIngestKey(tableName, k);
contextWriter.write(bKey, val, context);
}
use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.
the class ShardedDataTypeHandlerTest method testMaskedForward.
@Test
public void testMaskedForward() {
Type dataType = new Type(DATA_TYPE_NAME, TestIngestHelper.class, null, null, 10, null);
String entry = "testingtesting";
RawRecordContainer record = new RawRecordContainerImpl();
record.setDataType(dataType);
record.setRawFileName("data_" + 0 + ".dat");
record.setRawRecordNumber(1);
record.setRawData(entry.getBytes(StandardCharsets.UTF_8));
Uid.List uid = Uid.List.newBuilder().setIGNORE(false).setCOUNT(1).addUID("d8zay2.-3pnndm.-anolok").build();
byte[] visibility = new byte[] { 65, 76, 76 };
byte[] maskVisibility = new byte[] { 67, 76, 76 };
byte[] shardId = new byte[] { 50, 48, 48, 48, 48, 49, 48, 49, 95, 54, 57 };
Multimap<BulkIngestKey, Value> termIndex = handler.createTermIndexColumn(record, "TEST_COL", "FIELD_VALUE", visibility, maskVisibility, maskedFieldHelper, shardId, handler.getShardIndexTableName(), new Value(uid.toByteArray()), Direction.FORWARD);
assertTrue(termIndex.size() == 2);
boolean foundValue = false;
for (BulkIngestKey k : termIndex.keySet()) {
Text row = k.getKey().getRow();
if (row.toString().contains("MASKED_VALUE")) {
foundValue = true;
}
}
assertTrue(foundValue);
}
use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.
the class ShardedDataTypeHandlerTest method testCreateTermIndex.
@Test
public void testCreateTermIndex() {
Type dataType = new Type(DATA_TYPE_NAME, TestIngestHelper.class, null, null, 10, null);
String entry = "testingtesting";
RawRecordContainer record = new RawRecordContainerImpl();
record.setDataType(dataType);
record.setRawFileName("data_" + 0 + ".dat");
record.setRawRecordNumber(1);
record.setRawData(entry.getBytes(StandardCharsets.UTF_8));
Uid.List uid = Uid.List.newBuilder().setIGNORE(false).setCOUNT(1).addUID("d8zay2.-3pnndm.-anolok").build();
byte[] visibility = new byte[] { 65, 76, 76 };
byte[] shardId = new byte[] { 50, 48, 48, 48, 48, 49, 48, 49, 95, 54, 57 };
Multimap<BulkIngestKey, Value> termIndex = handler.createTermIndexColumn(record, "TEST_COL", "FIELD_VALUE", visibility, null, null, shardId, handler.getShardIndexTableName(), new Value(uid.toByteArray()), Direction.FORWARD);
assertTrue(termIndex.size() == 1);
}
use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.
the class GlobalIndexUidAggregatorTest method testRemoveAndReAddUUID.
@Test
public void testRemoveAndReAddUUID() throws Exception {
GlobalIndexUidAggregator localAgg = new GlobalIndexUidAggregator();
IteratorSetting is = new IteratorSetting(19, "test", GlobalIndexUidAggregator.class);
GlobalIndexUidAggregator.setTimestampsIgnoredOpt(is, false);
GlobalIndexUidAggregator.setCombineAllColumns(is, true);
localAgg.validateOptions(is.getOptions());
String uuid1 = UUID.randomUUID().toString();
String uuid2 = UUID.randomUUID().toString();
// Remove UUID2 and then re-add it.
ArrayList<Value> values = Lists.newArrayList();
values.add(toValue(createNewUidList(uuid1)));
values.add(toValue(createNewRemoveUidList(uuid2)));
values.add(toValue(createNewUidList(uuid2)));
// Both uuid1 and uuid2 should be in the UID list. Uuid1 should be there because we
// added and never touched it, and uuid2 should be there because the last thing we did
// with it was an add (even though there was an older remove for it).
Collections.reverse(values);
Value result = localAgg.reduce(new Key("key"), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertEquals(2, resultList.getCOUNT());
assertEquals(2, resultList.getUIDCount());
assertEquals(2, resultList.getUIDList().size());
assertEquals(0, resultList.getREMOVEDUIDList().size());
assertTrue(resultList.getUIDList().contains(uuid1));
assertTrue(resultList.getUIDList().contains(uuid2));
}
use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.
the class GlobalIndexUidAggregatorTest method testRemoveAndReAddUUIDWithTimestampsIgnored.
@Test
public void testRemoveAndReAddUUIDWithTimestampsIgnored() throws Exception {
GlobalIndexUidAggregator localAgg = new GlobalIndexUidAggregator();
String uuid1 = UUID.randomUUID().toString();
String uuid2 = UUID.randomUUID().toString();
// Remove UUID2 and then re-add it.
ArrayList<Value> values = Lists.newArrayList();
values.add(toValue(createNewUidList(uuid1)));
values.add(toValue(createNewRemoveUidList(uuid2)));
values.add(toValue(createNewUidList(uuid2)));
// uuid1 should be in the UID list and uuid2 should be in the REMOVEDUID list. Uuid1
// should be there because we added and never touched it. uuid2 should be in the
// REMOVEDUID list even though the most recent action was an add because when timestamps
// are ignored, a remove takes precedence over any add.
Collections.reverse(values);
Value result = localAgg.reduce(new Key("key"), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertEquals(1, resultList.getCOUNT());
assertEquals(1, resultList.getUIDCount());
assertEquals(1, resultList.getUIDList().size());
assertEquals(1, resultList.getREMOVEDUIDList().size());
assertTrue(resultList.getUIDList().contains(uuid1));
assertTrue(resultList.getREMOVEDUIDList().contains(uuid2));
}
Aggregations