Search in sources :

Example 11 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class MutableMetadataHandler method delete.

/**
 * Delete the current K,V from the event, put in a history element
 *
 * @param writer
 * @param currentEntryList
 * @param isIndexed
 * @param isReverseIndexed
 * @param isContentField
 * @param dataTypes
 * @param user
 * @param mode
 * @param ts
 * @param purgeTokens
 *            If set true, then this will delete all tokens for a field as well.
 * @param insertHistory
 * @throws Exception
 */
protected void delete(MultiTableBatchWriter writer, Connector con, Set<Authorizations> userAuths, List<Pair<Key, Value>> currentEntryList, boolean isIndexOnlyField, boolean isIndexed, boolean isReverseIndexed, boolean isContentField, Set<Type<?>> dataTypes, String user, MODE mode, long ts, boolean purgeTokens, boolean insertHistory) throws Exception {
    for (Pair<Key, Value> currentEntry : currentEntryList) {
        ColumnVisibility viz = currentEntry.getFirst().getColumnVisibilityParsed();
        DatawaveKey key = new DatawaveKey(currentEntry.getFirst());
        String shardId = key.getRow().toString();
        long currentEntryTimestamp = currentEntry.getFirst().getTimestamp();
        if (key.getType().equals(KeyType.INDEX_EVENT)) {
            // Only the delete the fi key
            Mutation e = new Mutation(currentEntry.getFirst().getRow());
            e.putDelete(currentEntry.getFirst().getColumnFamily(), currentEntry.getFirst().getColumnQualifier(), viz, currentEntryTimestamp);
            writer.getBatchWriter(this.getEventTableName()).addMutation(e);
        } else if (key.getType().equals(KeyType.EVENT)) {
            Mutation m = new Mutation(key.getFieldName());
            // Decrement the frequency (metadata table)
            m.put(ColumnFamilyConstants.COLF_F, new Text(key.getDataType() + NULL_BYTE + DateHelper.format(currentEntryTimestamp)), new Value(SummingCombiner.VAR_LEN_ENCODER.encode(-1L)));
            // Remove the event field.
            Mutation e = new Mutation(currentEntry.getFirst().getRow());
            if (!isIndexOnlyField) {
                e.putDelete(currentEntry.getFirst().getColumnFamily(), currentEntry.getFirst().getColumnQualifier(), viz, currentEntryTimestamp);
            }
            // Remove the content column
            if (isContentField) {
                ContentIterable dKeys = getContentKeys(con, this.getEventTableName(), userAuths, shardId, key.getDataType(), key.getUid());
                try {
                    for (Key dKey : dKeys) {
                        e.putDelete(dKey.getColumnFamily(), dKey.getColumnQualifier(), dKey.getColumnVisibilityParsed(), dKey.getTimestamp());
                    }
                } finally {
                    dKeys.close();
                }
            }
            long tsToDay = (ts / MS_PER_DAY) * MS_PER_DAY;
            FieldIndexIterable fiKeys = getFieldIndexKeys(con, this.getEventTableName(), userAuths, shardId, key.getDataType(), key.getUid(), key.getFieldName(), key.getFieldValue(), dataTypes, purgeTokens);
            try {
                for (Key fiKey : fiKeys) {
                    // Remove the field index entry
                    e.putDelete(fiKey.getColumnFamily(), fiKey.getColumnQualifier(), fiKey.getColumnVisibilityParsed(), fiKey.getTimestamp());
                    DatawaveKey fiKeyParsed = new DatawaveKey(fiKey);
                    // Remove the term frequency entry
                    e.putDelete(ColumnFamilyConstants.COLF_TF.toString(), fiKeyParsed.getDataType() + NULL_BYTE + fiKeyParsed.getUid() + NULL_BYTE + fiKeyParsed.getFieldValue() + NULL_BYTE + fiKeyParsed.getFieldName(), fiKey.getColumnVisibilityParsed(), fiKey.getTimestamp());
                    // Create a UID object for the Value which will remove this UID
                    Builder uidBuilder = Uid.List.newBuilder();
                    uidBuilder.setIGNORE(false);
                    uidBuilder.setCOUNT(-1);
                    uidBuilder.addUID(fiKeyParsed.getUid());
                    Uid.List uidList = uidBuilder.build();
                    Value val = new Value(uidList.toByteArray());
                    // buffer the global indexes cq
                    String cq = shardId + NULL_BYTE + fiKeyParsed.getDataType();
                    // Remove the global index entry by adding the value
                    Mutation i = new Mutation(fiKeyParsed.getFieldValue());
                    i.put(fiKeyParsed.getFieldName(), cq, fiKey.getColumnVisibilityParsed(), tsToDay, val);
                    writer.getBatchWriter(this.getIndexTableName()).addMutation(i);
                    // Remove the reverse global index entry
                    if (isReverseIndexed) {
                        String reverseIndexedValue = StringUtils.reverse(fiKeyParsed.getFieldValue());
                        Mutation ri = new Mutation(reverseIndexedValue);
                        ri.put(fiKeyParsed.getFieldName(), cq, viz, tsToDay, val);
                        writer.getBatchWriter(this.getReverseIndexTableName()).addMutation(ri);
                    }
                }
            } finally {
                fiKeys.close();
            }
            if (e.size() > 0) {
                writer.getBatchWriter(this.getEventTableName()).addMutation(e);
            }
            writer.getBatchWriter(this.getMetadataTableName()).addMutation(m);
            if (!isIndexOnlyField && insertHistory) {
                insertHistory(writer, shardId, key.getDataType(), key.getUid(), viz, key.getFieldName(), key.getFieldValue(), ts, isIndexOnlyField, isIndexed, isReverseIndexed, dataTypes, user, mode);
            }
        }
    }
    writer.flush();
}
Also used : Builder(datawave.ingest.protobuf.Uid.List.Builder) Text(org.apache.hadoop.io.Text) Value(org.apache.accumulo.core.data.Value) List(java.util.List) ArrayList(java.util.ArrayList) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 12 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocKeep.

@Test
public void apply_buildDocKeep() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD1");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1'"), Collections.emptySet());
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result != null);
    DatawaveKey parsedResult = new DatawaveKey(result);
    assertTrue(parsedResult.getDataType().equals("dataType1"));
    assertTrue(parsedResult.getUid().equals("123.345.456"));
    assertTrue(parsedResult.getFieldName().equals("FIELD1"));
    assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
    // test that the doc is empty
    assertTrue(doc.size() == 2);
    assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
    assertTrue(doc.get("FIELD1").getData().toString().equals("VALUE1"));
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class CardinalitySummation method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@SuppressWarnings("unchecked")
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document currentDoc = new Document();
    Key topKey = input.getKey();
    // reduce the key to the document key pieces only and a max cq in order to ensure the top key
    // sorts after the pieces it is summarizing.
    topKey = new Key(topKey.getRow(), topKey.getColumnFamily(), MAX_UNICODE);
    DatawaveKey parser = new DatawaveKey(topKey);
    currentDoc = input.getValue();
    Map<?, ?> currentAttr = currentDoc.getDictionary();
    for (Entry<?, ?> attrE : currentAttr.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        TreeMultimap<String, Attribute<?>> tmpMap = TreeMultimap.create();
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            if (attr.getValue() instanceof Attributes) {
                Attributes attrs = (Attributes) attr.getValue();
                NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
                for (Attribute<?> myAttribute : attrs.getAttributes()) {
                    if (log.isTraceEnabled())
                        log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
                    if (!attributes.isEmpty()) {
                        boolean foundAmongOthers = false;
                        for (Attribute<?> thoseAttributes : attributes) {
                            if (myAttribute instanceof Cardinality) {
                                if (((Cardinality) myAttribute).equals(thoseAttributes)) {
                                    Cardinality card = (Cardinality) thoseAttributes;
                                    Cardinality otherCard = (Cardinality) myAttribute;
                                    merge(card, otherCard, parser, merge);
                                    if (log.isTraceEnabled())
                                        log.trace("Offering to " + attr.getKey() + " value " + card.getContent().getFloorValue() + " " + card.getContent().getCeilingValue());
                                    foundAmongOthers = true;
                                    break;
                                }
                            } else
                                throw new RuntimeException("Have " + myAttribute.getClass());
                        }
                        if (!foundAmongOthers) {
                            if (log.isTraceEnabled())
                                log.trace("put attributes " + attr.getKey() + " " + myAttribute.getData());
                            tmpMap.put(attr.getKey(), myAttribute);
                        }
                        newDocumentAttributes.putAll(tmpMap);
                    } else {
                        if (log.isTraceEnabled())
                            log.trace("adding attributes " + attr.getKey() + " " + myAttribute.getData());
                        newDocumentAttributes.put(attr.getKey(), myAttribute);
                    }
                }
            } else {
                if (log.isTraceEnabled())
                    log.trace("Testing " + attr.getKey() + " " + attr.getValue().getData());
                NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
                Attribute<?> attribute = attributes.floor(attr.getValue());
                boolean found = false;
                for (Attribute<?> thoseAttributes : attributes) {
                    if (thoseAttributes.equals(attr.getValue())) {
                        if (log.isTraceEnabled())
                            log.trace("found for " + attr.getKey() + " " + thoseAttributes.getData());
                        Cardinality card = (Cardinality) thoseAttributes;
                        Cardinality otherCard = (Cardinality) attr.getValue();
                        merge(card, otherCard, parser, merge);
                        found = true;
                        break;
                    } else {
                    }
                }
                if (!found) {
                    if (log.isTraceEnabled())
                        log.trace("Don't have " + attr.getKey() + " " + attr.getValue().getData());
                    newDocumentAttributes.put(attr.getKey(), attr.getValue());
                }
            }
        }
    }
    referenceDocument = new Document();
    if (log.isTraceEnabled())
        log.trace("entries" + newDocumentAttributes.entries());
    referenceDocument.putAll(newDocumentAttributes.entries().iterator(), false);
    if (log.isTraceEnabled())
        log.trace("currentDoc" + referenceDocument);
    referenceKey = topKey;
    return Maps.immutableEntry(topKey, referenceDocument);
}
Also used : Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Example 14 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class TLDTermFrequencyAggregatorTest method apply_buildDocKeep.

@Test
public void apply_buildDocKeep() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "FIELD2", "VALUE2", "dataType1", "123.345.456.1", 10), new Value());
    treeMap.put(getTF("123", "FIELD3", "VALUE3", "dataType1", "123.345.456.2", 10), new Value());
    treeMap.put(getTF("123", "FIELD4", "VALUE4", "dataType1", "123.345.456.3", 10), new Value());
    treeMap.put(getTF("123", "FIELD5", "VALUE5", "dataType1", "123.345.456.4", 10), new Value());
    treeMap.put(getTF("123", "FIELD6", "VALUE6", "dataType1", "123.345.456.5", 10), new Value());
    treeMap.put(getTF("123", "FIELD7", "VALUE7", "dataType1", "123.345.456.6", 10), new Value());
    treeMap.put(getTF("123", "FIELD8", "VALUE8", "dataType1", "123.345.456.7", 10), new Value());
    treeMap.put(getTF("123", "FIELD9", "VALUE9", "dataType1", "123.345.456.8", 10), new Value());
    treeMap.put(getTF("123", "FIELD10", "VALUE10", "dataType1", "123.345.456.9", 10), new Value());
    treeMap.put(getTF("123", "FIELD2", "VALUE11", "dataType1", "123.345.456.10.1", 10), new Value());
    treeMap.put(getTF("123", "FIELD1", "VALUE12", "dataType1", "123.345.456.11.1.1", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "123.345.457", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD1");
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1' && FIELD2 == 'VALUE2'"), Collections.emptySet());
    aggregator = new TLDTermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result != null);
    DatawaveKey parsedResult = new DatawaveKey(result);
    assertTrue(parsedResult.getDataType().equals("dataType1"));
    assertTrue(parsedResult.getUid().equals("123.345.456"));
    assertTrue(parsedResult.getFieldName(), parsedResult.getFieldName().equals("FIELD1"));
    assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
    // test that the doc is empty
    assertTrue(doc.size() == 5);
    assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
    assertTrue(((Set<TypeAttribute>) doc.get("FIELD1").getData()).size() == 2);
    Iterator<TypeAttribute> i = ((Set<TypeAttribute>) doc.get("FIELD1").getData()).iterator();
    List<String> expected = new ArrayList<>();
    expected.add("VALUE1");
    expected.add("VALUE12");
    while (i.hasNext()) {
        TypeAttribute ta = i.next();
        assertTrue(ta.isToKeep());
        assertTrue(expected.remove(ta.getData().toString()));
    }
    assertTrue(expected.size() == 0);
    assertTrue(((Set<TypeAttribute>) doc.get("FIELD2").getData()).size() == 2);
    i = ((Set<TypeAttribute>) doc.get("FIELD2").getData()).iterator();
    expected = new ArrayList<>();
    expected.add("VALUE2");
    expected.add("VALUE11");
    while (i.hasNext()) {
        TypeAttribute ta = i.next();
        assertTrue(ta.isToKeep());
        assertTrue(expected.remove(ta.getData().toString()));
    }
    assertTrue(expected.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "123.345.457", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) TypeAttribute(datawave.query.attributes.TypeAttribute) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 15 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class EventDataQueryExpressionFilter method apply.

protected boolean apply(Key key, boolean update) {
    if (!this.initialized) {
        throw new RuntimeException("The EventDataQueryExpressionFilter was not initialized");
    }
    final DatawaveKey datawaveKey = new DatawaveKey(key);
    final String fieldName = JexlASTHelper.deconstructIdentifier(datawaveKey.getFieldName(), false);
    if (update) {
        return this.filters.containsKey(fieldName) && this.filters.get(fieldName).apply(key);
    } else {
        return this.filters.containsKey(fieldName) && this.filters.get(fieldName).peek(key);
    }
}
Also used : DatawaveKey(datawave.query.data.parsers.DatawaveKey)

Aggregations

DatawaveKey (datawave.query.data.parsers.DatawaveKey)15 Key (org.apache.accumulo.core.data.Key)12 Document (datawave.query.attributes.Document)5 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)4 PartialKey (org.apache.accumulo.core.data.PartialKey)4 Range (org.apache.accumulo.core.data.Range)4 Text (org.apache.hadoop.io.Text)4 Cardinality (datawave.query.attributes.Cardinality)3 Entry (java.util.Map.Entry)3 Value (org.apache.accumulo.core.data.Value)3 Attribute (datawave.query.attributes.Attribute)2 AttributeFactory (datawave.query.attributes.AttributeFactory)2 Attributes (datawave.query.attributes.Attributes)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)2 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)2 TypeMetadata (datawave.query.util.TypeMetadata)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2