Search in sources :

Example 1 with EventDataQueryFieldFilter

use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.

the class DynamicFacetIterator method getDocumentIterator.

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public Iterator<Entry<Key, Document>> getDocumentIterator(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException, ConfigException, InstantiationException, IllegalAccessException {
    // Otherwise, we have to use the field index
    // Seek() the boolean logic stuff
    createAndSeekIndexIterator(range, columnFamilies, inclusive);
    Function<Entry<Key, Document>, Entry<DocumentData, Document>> keyToDoc = null;
    // TODO consider using the new EventDataQueryExpressionFilter
    EventDataQueryFieldFilter projection = null;
    Iterator<Entry<Key, Document>> documents = null;
    if (!configuration.getFacetedFields().isEmpty()) {
        projection = new EventDataQueryFieldFilter();
        projection.initializeWhitelist(configuration.getFacetedFields());
    }
    if (!configuration.hasFieldLimits() || projection != null) {
        keyToDoc = new KeyToDocumentData(source.deepCopy(myEnvironment), super.equality, projection, this.includeHierarchyFields, this.includeHierarchyFields);
    }
    AccumuloTreeIterable<Key, DocumentData> doc = null;
    if (null != keyToDoc) {
        doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, keyToDoc);
    } else {
        if (log.isTraceEnabled()) {
            log.trace("Skipping document lookup, because we don't need it");
        }
        doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {

            @Override
            @Nullable
            public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
                Set<Key> docKeys = Sets.newHashSet();
                List<Entry<Key, Value>> attrs = Lists.newArrayList();
                return Maps.immutableEntry(new DocumentData(input.getKey(), docKeys, attrs, true), input.getValue());
            }
        });
    }
    doc.seek(range, columnFamilies, inclusive);
    TypeMetadata typeMetadata = this.getTypeMetadata();
    documents = Iterators.transform(doc.iterator(), new Aggregation(this.getTimeFilter(), typeMetadata, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, false, null));
    switch(configuration.getType()) {
        case SHARD_COUNT:
        case DAY_COUNT:
            SortedKeyValueIterator<Key, Value> sourceDeepCopy = source.deepCopy(myEnvironment);
            documents = getEvaluation(sourceDeepCopy, documents, compositeMetadata, typeMetadata, columnFamilies, inclusive);
            // Take the document Keys and transform it into Entry<Key,Document>, removing Attributes for this Document
            // which do not fall within the expected time range
            documents = Iterators.transform(documents, new DocumentCountCardinality(configuration.getType(), !merge));
        default:
            break;
    }
    return documents;
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) Document(datawave.query.attributes.Document) DocumentCountCardinality(datawave.query.function.DocumentCountCardinality) KeyToDocumentData(datawave.query.function.KeyToDocumentData) Aggregation(datawave.query.function.Aggregation) KeyToDocumentData(datawave.query.function.KeyToDocumentData) DocumentData(datawave.query.iterator.aggregation.DocumentData) Function(com.google.common.base.Function) Entry(java.util.Map.Entry) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) Nullable(javax.annotation.Nullable)

Example 2 with EventDataQueryFieldFilter

use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocNotKeep.

@Test
public void apply_buildDocNotKeep() throws IOException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter();
    Set<String> blacklist = new HashSet<>();
    blacklist.add("FIELD1");
    ((EventDataQueryFieldFilter) filter).initializeBlacklist(blacklist);
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result == null);
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with EventDataQueryFieldFilter

use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocKeepFilteredOut.

@Test
public void apply_buildDocKeepFilteredOut() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.EMPTY_SET);
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result == null);
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with EventDataQueryFieldFilter

use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocKeep.

@Test
public void apply_buildDocKeep() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD1");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1'"), Collections.emptySet());
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result != null);
    DatawaveKey parsedResult = new DatawaveKey(result);
    assertTrue(parsedResult.getDataType().equals("dataType1"));
    assertTrue(parsedResult.getUid().equals("123.345.456"));
    assertTrue(parsedResult.getFieldName().equals("FIELD1"));
    assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
    // test that the doc is empty
    assertTrue(doc.size() == 2);
    assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
    assertTrue(doc.get("FIELD1").getData().toString().equals("VALUE1"));
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with EventDataQueryFieldFilter

use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.

the class TLDTermFrequencyAggregatorTest method apply_buildDocOnlyKeepToKeep.

@Test
public void apply_buildDocOnlyKeepToKeep() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456.1", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.emptySet());
    aggregator = new TLDTermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result == null);
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Document (datawave.query.attributes.Document)7 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)7 TypeMetadata (datawave.query.util.TypeMetadata)7 Key (org.apache.accumulo.core.data.Key)7 Value (org.apache.accumulo.core.data.Value)7 AttributeFactory (datawave.query.attributes.AttributeFactory)6 DatawaveKey (datawave.query.data.parsers.DatawaveKey)6 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)6 HashSet (java.util.HashSet)6 Range (org.apache.accumulo.core.data.Range)6 SortedMapIterator (org.apache.accumulo.core.iterators.SortedMapIterator)6 Test (org.junit.Test)6 Function (com.google.common.base.Function)1 TypeAttribute (datawave.query.attributes.TypeAttribute)1 Aggregation (datawave.query.function.Aggregation)1 DocumentCountCardinality (datawave.query.function.DocumentCountCardinality)1 KeyToDocumentData (datawave.query.function.KeyToDocumentData)1 DocumentData (datawave.query.iterator.aggregation.DocumentData)1 ArrayList (java.util.ArrayList)1 Entry (java.util.Map.Entry)1