Search in sources :

Example 1 with Uid

use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.

the class ExtendedContentIndexingColumnBasedHandler method createIndexColumn.

/**
 * Creates and writes the BulkIngestKey for the global (reverse) index to the ContextWriter (instead of the Multimap that the {@link ShardedDataTypeHandler}
 * uses).
 *
 * @param event
 * @param contextWriter
 * @param context
 * @param nFV
 * @param shardId
 * @param tableName
 * @param visibility
 * @param replacedMalformedUTF8
 * @param deleteMode
 * @throws IOException
 * @throws InterruptedException
 */
protected void createIndexColumn(RawRecordContainer event, ContextWriter<KEYOUT, VALUEOUT> contextWriter, TaskInputOutputContext<KEYIN, ? extends RawRecordContainer, KEYOUT, VALUEOUT> context, NormalizedContentInterface nFV, byte[] shardId, Text tableName, byte[] visibility, boolean replacedMalformedUTF8, boolean deleteMode) throws IOException, InterruptedException {
    // Shard Global Index Table Structure
    // Row: Field Value
    // Colf: Field Name
    // Colq: Shard Id : DataType
    // Value: UID
    Text colf = new Text(nFV.getIndexedFieldName());
    Text colq = new Text(shardId);
    TextUtil.textAppend(colq, this.eventDataTypeName, replacedMalformedUTF8);
    Key k = this.createIndexKey(nFV.getIndexedFieldValue().getBytes(), colf, colq, visibility, event.getDate(), deleteMode);
    // Create a UID object for the Value
    Uid.List.Builder uidBuilder = Uid.List.newBuilder();
    uidBuilder.setIGNORE(false);
    if (!deleteMode) {
        uidBuilder.setCOUNT(1);
        uidBuilder.addUID(this.eventUid);
    } else {
        uidBuilder.setCOUNT(-1);
        uidBuilder.addUID(this.eventUid);
    }
    Uid.List uidList = uidBuilder.build();
    Value val = new Value(uidList.toByteArray());
    BulkIngestKey bKey = new BulkIngestKey(tableName, k);
    contextWriter.write(bKey, val, context);
}
Also used : Uid(datawave.ingest.protobuf.Uid) Value(org.apache.accumulo.core.data.Value) NormalizedFieldAndValue(datawave.ingest.data.config.NormalizedFieldAndValue) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) Text(org.apache.hadoop.io.Text) List(java.util.List) ArrayList(java.util.ArrayList) OffsetList(datawave.ingest.mapreduce.handler.shard.content.BoundedOffsetQueue.OffsetList) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) Key(org.apache.accumulo.core.data.Key)

Example 2 with Uid

use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.

the class ShardedDataTypeHandlerTest method testMaskedForward.

@Test
public void testMaskedForward() {
    Type dataType = new Type(DATA_TYPE_NAME, TestIngestHelper.class, null, null, 10, null);
    String entry = "testingtesting";
    RawRecordContainer record = new RawRecordContainerImpl();
    record.setDataType(dataType);
    record.setRawFileName("data_" + 0 + ".dat");
    record.setRawRecordNumber(1);
    record.setRawData(entry.getBytes(StandardCharsets.UTF_8));
    Uid.List uid = Uid.List.newBuilder().setIGNORE(false).setCOUNT(1).addUID("d8zay2.-3pnndm.-anolok").build();
    byte[] visibility = new byte[] { 65, 76, 76 };
    byte[] maskVisibility = new byte[] { 67, 76, 76 };
    byte[] shardId = new byte[] { 50, 48, 48, 48, 48, 49, 48, 49, 95, 54, 57 };
    Multimap<BulkIngestKey, Value> termIndex = handler.createTermIndexColumn(record, "TEST_COL", "FIELD_VALUE", visibility, maskVisibility, maskedFieldHelper, shardId, handler.getShardIndexTableName(), new Value(uid.toByteArray()), Direction.FORWARD);
    assertTrue(termIndex.size() == 2);
    boolean foundValue = false;
    for (BulkIngestKey k : termIndex.keySet()) {
        Text row = k.getKey().getRow();
        if (row.toString().contains("MASKED_VALUE")) {
            foundValue = true;
        }
    }
    assertTrue(foundValue);
}
Also used : Uid(datawave.ingest.protobuf.Uid) GeometryType(datawave.data.type.GeometryType) NumberType(datawave.data.type.NumberType) Type(datawave.ingest.data.Type) RawRecordContainer(datawave.ingest.data.RawRecordContainer) NormalizedFieldAndValue(datawave.ingest.data.config.NormalizedFieldAndValue) Value(org.apache.accumulo.core.data.Value) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) Text(org.apache.hadoop.io.Text) RawRecordContainerImpl(datawave.ingest.config.RawRecordContainerImpl) Test(org.junit.Test)

Example 3 with Uid

use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.

the class ShardedDataTypeHandlerTest method testCreateTermIndex.

@Test
public void testCreateTermIndex() {
    Type dataType = new Type(DATA_TYPE_NAME, TestIngestHelper.class, null, null, 10, null);
    String entry = "testingtesting";
    RawRecordContainer record = new RawRecordContainerImpl();
    record.setDataType(dataType);
    record.setRawFileName("data_" + 0 + ".dat");
    record.setRawRecordNumber(1);
    record.setRawData(entry.getBytes(StandardCharsets.UTF_8));
    Uid.List uid = Uid.List.newBuilder().setIGNORE(false).setCOUNT(1).addUID("d8zay2.-3pnndm.-anolok").build();
    byte[] visibility = new byte[] { 65, 76, 76 };
    byte[] shardId = new byte[] { 50, 48, 48, 48, 48, 49, 48, 49, 95, 54, 57 };
    Multimap<BulkIngestKey, Value> termIndex = handler.createTermIndexColumn(record, "TEST_COL", "FIELD_VALUE", visibility, null, null, shardId, handler.getShardIndexTableName(), new Value(uid.toByteArray()), Direction.FORWARD);
    assertTrue(termIndex.size() == 1);
}
Also used : Uid(datawave.ingest.protobuf.Uid) GeometryType(datawave.data.type.GeometryType) NumberType(datawave.data.type.NumberType) Type(datawave.ingest.data.Type) RawRecordContainer(datawave.ingest.data.RawRecordContainer) NormalizedFieldAndValue(datawave.ingest.data.config.NormalizedFieldAndValue) Value(org.apache.accumulo.core.data.Value) BulkIngestKey(datawave.ingest.mapreduce.job.BulkIngestKey) RawRecordContainerImpl(datawave.ingest.config.RawRecordContainerImpl) Test(org.junit.Test)

Example 4 with Uid

use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.

the class GlobalIndexUidAggregatorTest method testRemoveAndReAddUUID.

@Test
public void testRemoveAndReAddUUID() throws Exception {
    GlobalIndexUidAggregator localAgg = new GlobalIndexUidAggregator();
    IteratorSetting is = new IteratorSetting(19, "test", GlobalIndexUidAggregator.class);
    GlobalIndexUidAggregator.setTimestampsIgnoredOpt(is, false);
    GlobalIndexUidAggregator.setCombineAllColumns(is, true);
    localAgg.validateOptions(is.getOptions());
    String uuid1 = UUID.randomUUID().toString();
    String uuid2 = UUID.randomUUID().toString();
    // Remove UUID2 and then re-add it.
    ArrayList<Value> values = Lists.newArrayList();
    values.add(toValue(createNewUidList(uuid1)));
    values.add(toValue(createNewRemoveUidList(uuid2)));
    values.add(toValue(createNewUidList(uuid2)));
    // Both uuid1 and uuid2 should be in the UID list. Uuid1 should be there because we
    // added and never touched it, and uuid2 should be there because the last thing we did
    // with it was an add (even though there was an older remove for it).
    Collections.reverse(values);
    Value result = localAgg.reduce(new Key("key"), values.iterator());
    Uid.List resultList = Uid.List.parseFrom(result.get());
    assertEquals(2, resultList.getCOUNT());
    assertEquals(2, resultList.getUIDCount());
    assertEquals(2, resultList.getUIDList().size());
    assertEquals(0, resultList.getREMOVEDUIDList().size());
    assertTrue(resultList.getUIDList().contains(uuid1));
    assertTrue(resultList.getUIDList().contains(uuid2));
}
Also used : Uid(datawave.ingest.protobuf.Uid) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 5 with Uid

use of datawave.ingest.protobuf.Uid in project datawave by NationalSecurityAgency.

the class GlobalIndexUidAggregatorTest method testRemoveAndReAddUUIDWithTimestampsIgnored.

@Test
public void testRemoveAndReAddUUIDWithTimestampsIgnored() throws Exception {
    GlobalIndexUidAggregator localAgg = new GlobalIndexUidAggregator();
    String uuid1 = UUID.randomUUID().toString();
    String uuid2 = UUID.randomUUID().toString();
    // Remove UUID2 and then re-add it.
    ArrayList<Value> values = Lists.newArrayList();
    values.add(toValue(createNewUidList(uuid1)));
    values.add(toValue(createNewRemoveUidList(uuid2)));
    values.add(toValue(createNewUidList(uuid2)));
    // uuid1 should be in the UID list and uuid2 should be in the REMOVEDUID list. Uuid1
    // should be there because we added and never touched it. uuid2 should be in the
    // REMOVEDUID list even though the most recent action was an add because when timestamps
    // are ignored, a remove takes precedence over any add.
    Collections.reverse(values);
    Value result = localAgg.reduce(new Key("key"), values.iterator());
    Uid.List resultList = Uid.List.parseFrom(result.get());
    assertEquals(1, resultList.getCOUNT());
    assertEquals(1, resultList.getUIDCount());
    assertEquals(1, resultList.getUIDList().size());
    assertEquals(1, resultList.getREMOVEDUIDList().size());
    assertTrue(resultList.getUIDList().contains(uuid1));
    assertTrue(resultList.getREMOVEDUIDList().contains(uuid2));
}
Also used : Uid(datawave.ingest.protobuf.Uid) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Uid (datawave.ingest.protobuf.Uid)36 Value (org.apache.accumulo.core.data.Value)36 Test (org.junit.Test)29 Key (org.apache.accumulo.core.data.Key)10 BulkIngestKey (datawave.ingest.mapreduce.job.BulkIngestKey)7 Text (org.apache.hadoop.io.Text)7 NormalizedFieldAndValue (datawave.ingest.data.config.NormalizedFieldAndValue)6 GeometryType (datawave.data.type.GeometryType)5 NumberType (datawave.data.type.NumberType)5 RawRecordContainerImpl (datawave.ingest.config.RawRecordContainerImpl)5 RawRecordContainer (datawave.ingest.data.RawRecordContainer)5 Type (datawave.ingest.data.Type)5 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)3 Builder (datawave.ingest.protobuf.Uid.List.Builder)3 ArrayList (java.util.ArrayList)3 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 CacheBuilder (com.google.common.cache.CacheBuilder)2 List (java.util.List)2 Entry (java.util.Map.Entry)2 IngestHelperInterface (datawave.ingest.data.config.ingest.IngestHelperInterface)1