Search in sources :

Example 11 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitorTest method eval.

private void eval(ASTJexlScript query, Range docRange, Key docKeyHit, List<Map.Entry<Key, Value>> source, boolean buildDoc, Map<String, List<String>> docKeys, Set<String> termFrequencyFields, Set<String> aggregationFields, Set<String> indexOnlyFields) throws IOException {
    IteratorBuildingVisitor visitor = new IteratorBuildingVisitor();
    TypeMetadata typeMetadata = new TypeMetadata();
    Iterator<Map.Entry<Key, Value>> iterator = source.iterator();
    IteratorEnvironment env = new BaseIteratorEnvironment();
    visitor.setSource(new SourceFactory(iterator), env);
    // configure the visitor for use
    visitor.setTermFrequencyFields(termFrequencyFields);
    visitor.setFieldsToAggregate(aggregationFields);
    visitor.setIndexOnlyFields(indexOnlyFields);
    visitor.setRange(docRange);
    visitor.setTimeFilter(TimeFilter.alwaysTrue());
    visitor.setLimitLookup(true);
    visitor.setTypeMetadata(typeMetadata);
    query.jjtAccept(visitor, null);
    NestedIterator result = visitor.root();
    Assert.assertTrue(result != null);
    SeekableNestedIterator seekableNestedIterator = new SeekableNestedIterator(result, env);
    seekableNestedIterator.seek(docRange, null, true);
    seekableNestedIterator.initialize();
    // asserts for a hit or miss
    if (docKeyHit == null) {
        Assert.assertFalse(seekableNestedIterator.hasNext());
    } else {
        Assert.assertTrue(seekableNestedIterator.hasNext());
        Key next = (Key) seekableNestedIterator.next();
        Assert.assertTrue(next != null);
        Assert.assertTrue(next.getRow().toString().equals(docKeyHit.getRow().toString()));
        Assert.assertTrue(next.getColumnFamily().toString().equals(docKeyHit.getColumnFamily().toString()));
        // asserts for document build
        Document d = seekableNestedIterator.document();
        Assert.assertTrue(d != null);
        if (buildDoc) {
            // +1 is for RECORD_ID field
            Assert.assertTrue(docKeys.keySet().size() + 1 == d.getDictionary().size());
            // verify hits for each specified field
            for (String field : docKeys.keySet()) {
                List<String> expected = docKeys.get(field);
                if (expected.size() == 1) {
                    // verify the only doc
                    Assert.assertTrue(d.getDictionary().get(field).getData().equals(expected.get(0)));
                } else {
                    // the data should be a set, verify it matches expected
                    Object dictData = d.getDictionary().get(field).getData();
                    Assert.assertTrue(dictData != null);
                    Assert.assertTrue(dictData instanceof Set);
                    Set dictSet = (Set) dictData;
                    Assert.assertTrue(dictSet.size() == expected.size());
                    Iterator<Attribute> dictIterator = dictSet.iterator();
                    while (dictIterator.hasNext()) {
                        Assert.assertTrue(expected.remove(dictIterator.next().getData()));
                    }
                    // verify that the expected set is now empty
                    Assert.assertTrue(expected.size() == 0);
                }
            }
        } else {
            // doc should be empty
            Assert.assertTrue(d.getDictionary().size() == 0);
        }
        // there should be no other hits
        Assert.assertFalse(seekableNestedIterator.hasNext());
    }
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) HashSet(java.util.HashSet) Set(java.util.Set) Attribute(datawave.query.attributes.Attribute) BaseIteratorEnvironment(org.apache.accumulo.core.client.impl.BaseIteratorEnvironment) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) Document(datawave.query.attributes.Document) BaseIteratorEnvironment(org.apache.accumulo.core.client.impl.BaseIteratorEnvironment) NestedIterator(datawave.query.iterator.NestedIterator) SeekableNestedIterator(datawave.query.iterator.SeekableNestedIterator) SeekableNestedIterator(datawave.query.iterator.SeekableNestedIterator) Key(org.apache.accumulo.core.data.Key)

Example 12 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class DocumentTransformerSupport method extractMetrics.

protected void extractMetrics(Document document, Key documentKey) {
    Map<String, Attribute<? extends Comparable<?>>> dictionary = document.getDictionary();
    Attribute<? extends Comparable<?>> timingMetadataAttribute = dictionary.get(LogTiming.TIMING_METADATA);
    if (timingMetadataAttribute != null && timingMetadataAttribute instanceof TimingMetadata) {
        TimingMetadata timingMetadata = (TimingMetadata) timingMetadataAttribute;
        long currentSourceCount = timingMetadata.getSourceCount();
        long currentNextCount = timingMetadata.getNextCount();
        long currentSeekCount = timingMetadata.getSeekCount();
        long currentYieldCount = timingMetadata.getYieldCount();
        String host = timingMetadata.getHost();
        sourceCount += currentSourceCount;
        nextCount += currentNextCount;
        seekCount += currentSeekCount;
        yieldCount += currentYieldCount;
        Map<String, Long> stageTimers = timingMetadata.getStageTimers();
        if (stageTimers.containsKey(QuerySpan.Stage.DocumentSpecificTree.toString())) {
            docRanges++;
        } else if (stageTimers.containsKey(QuerySpan.Stage.FieldIndexTree.toString())) {
            fiRanges++;
        }
        if (logTimingDetails || log.isTraceEnabled()) {
            StringBuilder sb = new StringBuilder();
            sb.append("retrieved document from host:").append(host).append(" at key:").append(documentKey.toStringNoTime()).append(" stageTimers:").append(stageTimers);
            sb.append(" sourceCount:").append(currentSourceCount).append(" nextCount:").append(currentNextCount).append(" seekCount:").append(currentSeekCount).append(" yieldCount:").append(currentYieldCount);
            if (log.isTraceEnabled()) {
                log.trace(sb.toString());
            } else {
                log.info(sb.toString());
            }
        }
        if (dictionary.size() == 1) {
            // this document contained only timing metadata
            throw new EmptyObjectException();
        }
    }
}
Also used : Attribute(datawave.query.attributes.Attribute) EmptyObjectException(datawave.webservice.query.exception.EmptyObjectException) TimingMetadata(datawave.query.attributes.TimingMetadata)

Example 13 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class GroupingTransform method getListKeyCounts.

private void getListKeyCounts(Entry<Key, Document> entry) {
    log.trace("{} get list key counts for: {}", flatten ? "t" : "web" + "server", entry);
    keys.add(entry.getKey());
    Set<String> expandedGroupFieldsList = new LinkedHashSet<>();
    // if the incoming Documents have been aggregated on the tserver, they will have a COUNT field.
    // use the value in the COUNT field as a loop max when the fields are put into the countingMap
    // During the flush operation, a new COUNT field will be created based on the number of unique
    // field sets in the countingMap
    Map<String, Attribute<? extends Comparable<?>>> dictionary = entry.getValue().getDictionary();
    Map<String, Integer> countKeyMap = new HashMap<>();
    dictionary.keySet().stream().filter(key -> key.startsWith("COUNT")).filter(countKey -> entry.getValue().getDictionary().containsKey(countKey)).forEach(countKey -> {
        TypeAttribute countTypeAttribute = ((TypeAttribute) entry.getValue().getDictionary().get(countKey));
        int count = ((BigDecimal) countTypeAttribute.getType().getDelegate()).intValue();
        countKeyMap.put(countKey, count);
    });
    Multimap<String, String> fieldToFieldWithContextMap = this.getFieldToFieldWithGroupingContextMap(entry.getValue(), expandedGroupFieldsList);
    log.trace("got a new fieldToFieldWithContextMap: {}", fieldToFieldWithContextMap);
    int longest = this.longestValueList(fieldToFieldWithContextMap);
    for (int i = 0; i < longest; i++) {
        Collection<GroupingTypeAttribute<?>> fieldCollection = new HashSet<>();
        String currentGroupingContext = "";
        for (String fieldListItem : expandedGroupFieldsList) {
            log.trace("fieldListItem: {}", fieldListItem);
            Collection<String> gtNames = fieldToFieldWithContextMap.get(fieldListItem);
            if (gtNames == null || gtNames.isEmpty()) {
                log.trace("gtNames: {}", gtNames);
                log.trace("fieldToFieldWithContextMap: {} did not contain: {}", fieldToFieldWithContextMap, fieldListItem);
            } else {
                String gtName = gtNames.iterator().next();
                int idx = gtName.indexOf('.');
                if (idx != -1) {
                    currentGroupingContext = gtName.substring(idx + 1);
                }
                if (!fieldListItem.equals(gtName)) {
                    fieldToFieldWithContextMap.remove(fieldListItem, gtName);
                }
                log.trace("fieldToFieldWithContextMap now: {}", fieldToFieldWithContextMap);
                log.trace("gtName: {}", gtName);
                fieldCollection.add(fieldMap.get(gtName));
            }
        }
        if (fieldCollection.size() == expandedGroupFieldsList.size()) {
            // get the count out of the countKeyMap
            Integer count = countKeyMap.get("COUNT." + currentGroupingContext);
            if (count == null)
                count = 1;
            // see above comment about the COUNT field
            log.trace("{} adding {} of {} to counting map", flatten ? "tserver" : "webserver", count, fieldCollection);
            IntStream.range(0, count).forEach(j -> countingMap.add(fieldCollection));
            fieldVisibilities.put(fieldCollection, getColumnVisibility(entry));
            log.trace("put {} to {} into fieldVisibilities {}", fieldCollection, getColumnVisibility(entry), fieldVisibilities);
        } else {
            log.trace("fieldList.size() != this.expandedGroupFieldsList.size()");
            log.trace("fieldList: {}", fieldCollection);
            log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList);
        }
    }
    log.trace("countingMap: {}", countingMap);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) IntStream(java.util.stream.IntStream) Document(datawave.query.attributes.Document) Arrays(java.util.Arrays) Attribute(datawave.query.attributes.Attribute) SortedSet(java.util.SortedSet) ShardQueryLogic(datawave.query.tables.ShardQueryLogic) JexlASTHelper(datawave.query.jexl.JexlASTHelper) QueryModel(datawave.query.model.QueryModel) HashCodeBuilder(org.apache.commons.lang.builder.HashCodeBuilder) HashMap(java.util.HashMap) Query(datawave.webservice.query.Query) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BigDecimal(java.math.BigDecimal) HashMultimap(com.google.common.collect.HashMultimap) MarkingFunctions(datawave.marking.MarkingFunctions) TreeMultimap(com.google.common.collect.TreeMultimap) NumberType(datawave.data.type.NumberType) Key(org.apache.accumulo.core.data.Key) Map(java.util.Map) Value(org.apache.accumulo.core.data.Value) LinkedList(java.util.LinkedList) Nullable(javax.annotation.Nullable) LinkedHashSet(java.util.LinkedHashSet) BaseQueryLogic(datawave.webservice.query.logic.BaseQueryLogic) Logger(org.slf4j.Logger) SortedSetMultimap(com.google.common.collect.SortedSetMultimap) Iterator(java.util.Iterator) Collection(java.util.Collection) Set(java.util.Set) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) TypeAttribute(datawave.query.attributes.TypeAttribute) AbstractMap(java.util.AbstractMap) List(java.util.List) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Type(datawave.data.type.Type) Entry(java.util.Map.Entry) LoggerFactory.getLogger(org.slf4j.LoggerFactory.getLogger) YieldCallback(org.apache.accumulo.core.iterators.YieldCallback) Assert(org.springframework.util.Assert) Attribute(datawave.query.attributes.Attribute) TypeAttribute(datawave.query.attributes.TypeAttribute) HashMap(java.util.HashMap) BigDecimal(java.math.BigDecimal) TypeAttribute(datawave.query.attributes.TypeAttribute) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 14 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class TLDFieldIndexAggregatorTest method apply_testAggregateFilter.

@Test
public void apply_testAggregateFilter() throws IOException {
    EventDataQueryFilter mockFilter = EasyMock.createMock(EventDataQueryFilter.class);
    TypeMetadata typeMetadata = new TypeMetadata();
    AttributeFactory factory = new AttributeFactory(typeMetadata);
    Set<String> aggregatedFields = new HashSet<>();
    aggregatedFields.add("FOO");
    aggregator = new TLDFieldIndexAggregator(aggregatedFields, mockFilter, -1);
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    Key fi1 = getFi("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10);
    Key fi2 = getFi("123", "FIELD1", "VALUE2", "dataType1", "123.345.456.1", 10);
    Key fi3 = getFi("123", "FIELD1", "VALUE3", "dataType1", "123.345.456.2", 10);
    Key fi4 = getFi("123", "FIELD1", "VALUE4", "dataType1", "123.345.456.3", 10);
    // FOO included in the filter
    Key fi5 = getFi("123", "FOO", "bar", "dataType1", "123.345.456.3", 10);
    // FOO2 not included in the filter
    Key fi6 = getFi("123", "FOO2", "bar", "dataType1", "123.345.456.3", 10);
    // key outside the range which should not be aggregated
    Key fi7 = getFi("123", "XENO", "zap", "dataType1", "234.345.456", 10);
    treeMap.put(fi1, new Value());
    treeMap.put(fi2, new Value());
    treeMap.put(fi3, new Value());
    treeMap.put(fi4, new Value());
    treeMap.put(fi5, new Value());
    treeMap.put(fi6, new Value());
    treeMap.put(fi7, new Value());
    EasyMock.expect(mockFilter.keep(EasyMock.isA(Key.class))).andReturn(true);
    EasyMock.replay(mockFilter);
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Document doc = new Document();
    aggregator.apply(itr, doc, factory);
    EasyMock.verify(mockFilter);
    // list of FIELD1 values to expect
    List<String> expectedFieldValues = new ArrayList<>();
    expectedFieldValues.add("VALUE1");
    expectedFieldValues.add("VALUE2");
    expectedFieldValues.add("VALUE3");
    expectedFieldValues.add("VALUE4");
    assertTrue(doc.get("FIELD1").isToKeep());
    Set<Attribute> attributes = ((Set<Attribute>) doc.get("FIELD1").getData());
    assertTrue(attributes.size() == 4);
    Iterator<Attribute> attrItr = attributes.iterator();
    while (attrItr.hasNext()) {
        Attribute attr = attrItr.next();
        assertFalse(attr.isToKeep());
        assertTrue(expectedFieldValues.remove(attr.getData().toString()));
    }
    assertTrue(expectedFieldValues.size() == 0);
    // FOO kept
    assertTrue(doc.get("FOO").isToKeep());
    // FOO2 not kept
    assertTrue(!doc.get("FOO2").isToKeep());
    // out of document range not included
    assertTrue(doc.get("XENO") == null);
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) TypeMetadata(datawave.query.util.TypeMetadata) Attribute(datawave.query.attributes.Attribute) ArrayList(java.util.ArrayList) AttributeFactory(datawave.query.attributes.AttributeFactory) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 15 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class GroupFields method apply.

@Override
public Entry<Key, Document> apply(Entry<Key, Document> entry) {
    Document document = entry.getValue();
    for (Map.Entry<String, Attribute<? extends Comparable<?>>> de : document.entrySet()) {
        String keyWithGrouping = de.getKey();
        log.trace("keyWithGrouping is:" + keyWithGrouping);
        String keyNoGrouping = keyWithGrouping;
        // if we have grouping context on, remove the grouping context
        if (keyNoGrouping.indexOf('.') != -1) {
            keyNoGrouping = keyNoGrouping.substring(0, keyNoGrouping.indexOf('.'));
            log.trace("keyNoGrouping is:" + keyNoGrouping);
        }
        if (this.groupFieldsSet.contains(keyNoGrouping)) {
            // look for the key without the grouping context
            if (log.isTraceEnabled())
                log.trace("groupFieldsSet contains " + keyNoGrouping + " so grouping with " + keyWithGrouping);
            Attribute<?> attr = de.getValue();
            int delta = 1;
            if (attr instanceof Attributes) {
                Attributes attrs = (Attributes) attr;
                delta = attrs.size();
                log.trace("delta for " + attrs + " is " + delta);
            } else {
                log.trace("delta for " + attr + " is " + delta);
            }
            // increment the count
            int count = this.groupFieldsMap.get(keyWithGrouping) == null ? 0 : this.groupFieldsMap.get(keyWithGrouping);
            this.groupFieldsMap.put(keyWithGrouping, count + delta);
        }
    }
    // mutate the document with the changes collected in the above loop
    applyCounts(document, groupFieldsMap);
    return entry;
}
Also used : Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Map(java.util.Map)

Aggregations

Attribute (datawave.query.attributes.Attribute)21 Document (datawave.query.attributes.Document)17 Attributes (datawave.query.attributes.Attributes)14 Key (org.apache.accumulo.core.data.Key)13 HashSet (java.util.HashSet)11 Value (org.apache.accumulo.core.data.Value)8 TypeAttribute (datawave.query.attributes.TypeAttribute)6 Map (java.util.Map)6 Entry (java.util.Map.Entry)6 Cardinality (datawave.query.attributes.Cardinality)4 QueryImpl (datawave.webservice.query.QueryImpl)4 GenericQueryConfiguration (datawave.webservice.query.configuration.GenericQueryConfiguration)4 Set (java.util.Set)4 Type (datawave.data.type.Type)3 Multimap (com.google.common.collect.Multimap)2 Content (datawave.query.attributes.Content)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)2 TimingMetadata (datawave.query.attributes.TimingMetadata)2 DatawaveKey (datawave.query.data.parsers.DatawaveKey)2