Search in sources :

Example 16 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class FacetedGrouping method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document currentDoc = input.getValue();
    // list of document attributes to update.
    TreeMultimap<String, Attribute<?>> newDocumentAttributes = TreeMultimap.create();
    Key topKey = null;
    if (topKey == null)
        topKey = input.getKey();
    currentDoc = input.getValue();
    Map<?, ?> currentAttr = currentDoc.getDictionary();
    for (Entry<?, ?> attrE : currentAttr.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            if (attr.getValue() instanceof Attributes) {
                Attributes newAttrs = new Attributes(attr.getValue().isToKeep());
                Set<Attribute<? extends Comparable<?>>> attributes = ((Attributes) attr.getValue()).getAttributes();
                if (log.isTraceEnabled())
                    log.trace(attr.getKey() + " is attributes, size is " + attributes.size());
                Collection<FieldValueCardinality> cardList = cachedAttributeRanges.get(attr.getKey());
                // we already know that we will exceed the list size
                if (cardList.size() + attributes.size() > config.getMaximumFacetGroupCount()) {
                    if (log.isTraceEnabled())
                        log.trace("cardinality exceeds maximum facet count");
                    cardList = adjustAttributeGrouping(cardList, attributes);
                }
                List<Cardinality> newCardList = Lists.newArrayList();
                for (FieldValueCardinality fvcBucket : cardList) {
                    FieldValueCardinality fvc = new FieldValueCardinality();
                    fvc.setContent(fvcBucket.getFloorValue());
                    fvc.setCeiling(fvcBucket.getCeilingValue());
                    // for cardinalities, only use the visibility metadata
                    Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, attr.getValue().getColumnVisibility(), -1);
                    Cardinality card = new Cardinality(fvc, attr.getValue().getMetadata(), newAttrs.isToKeep());
                    newCardList.add(card);
                }
                for (Attribute<? extends Comparable<?>> myAttributeList : attributes) {
                    Cardinality card = (Cardinality) myAttributeList;
                    boolean foundBucket = false;
                    for (Cardinality fvcBucket : newCardList) {
                        if (fvcBucket.getContent().isWithin(card.getContent())) {
                            try {
                                fvcBucket.getContent().merge(card.getContent());
                                foundBucket = true;
                            } catch (CardinalityMergeException e) {
                                throw new RuntimeException(e);
                            }
                            break;
                        }
                    }
                    if (!foundBucket) {
                        newCardList.add(card);
                    }
                }
                for (Cardinality cardBucket : newCardList) {
                    newAttrs.add(cardBucket);
                    cachedAttributeRanges.put(attr.getKey(), cardBucket.getContent());
                }
                newDocumentAttributes.put(attr.getKey(), newAttrs);
            } else // ignore none Attributes attributes
            {
                if (log.isTraceEnabled())
                    log.trace(attr.getKey() + " is " + attr.getValue().getClass());
            }
        }
    }
    if (log.isTraceEnabled())
        log.trace("entries" + newDocumentAttributes.entries());
    for (Entry<String, Attribute<?>> newAttr : newDocumentAttributes.entries()) {
        currentDoc.replace(newAttr.getKey(), newAttr.getValue(), false, false);
    }
    if (log.isTraceEnabled())
        log.trace("currentDoc" + currentDoc);
    return Maps.immutableEntry(topKey, currentDoc);
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException) Entry(java.util.Map.Entry) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Key(org.apache.accumulo.core.data.Key)

Example 17 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class CardinalitySummation method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@SuppressWarnings("unchecked")
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document currentDoc = new Document();
    Key topKey = input.getKey();
    // reduce the key to the document key pieces only and a max cq in order to ensure the top key
    // sorts after the pieces it is summarizing.
    topKey = new Key(topKey.getRow(), topKey.getColumnFamily(), MAX_UNICODE);
    DatawaveKey parser = new DatawaveKey(topKey);
    currentDoc = input.getValue();
    Map<?, ?> currentAttr = currentDoc.getDictionary();
    for (Entry<?, ?> attrE : currentAttr.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        TreeMultimap<String, Attribute<?>> tmpMap = TreeMultimap.create();
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            if (attr.getValue() instanceof Attributes) {
                Attributes attrs = (Attributes) attr.getValue();
                NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
                for (Attribute<?> myAttribute : attrs.getAttributes()) {
                    if (log.isTraceEnabled())
                        log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
                    if (!attributes.isEmpty()) {
                        boolean foundAmongOthers = false;
                        for (Attribute<?> thoseAttributes : attributes) {
                            if (myAttribute instanceof Cardinality) {
                                if (((Cardinality) myAttribute).equals(thoseAttributes)) {
                                    Cardinality card = (Cardinality) thoseAttributes;
                                    Cardinality otherCard = (Cardinality) myAttribute;
                                    merge(card, otherCard, parser, merge);
                                    if (log.isTraceEnabled())
                                        log.trace("Offering to " + attr.getKey() + " value " + card.getContent().getFloorValue() + " " + card.getContent().getCeilingValue());
                                    foundAmongOthers = true;
                                    break;
                                }
                            } else
                                throw new RuntimeException("Have " + myAttribute.getClass());
                        }
                        if (!foundAmongOthers) {
                            if (log.isTraceEnabled())
                                log.trace("put attributes " + attr.getKey() + " " + myAttribute.getData());
                            tmpMap.put(attr.getKey(), myAttribute);
                        }
                        newDocumentAttributes.putAll(tmpMap);
                    } else {
                        if (log.isTraceEnabled())
                            log.trace("adding attributes " + attr.getKey() + " " + myAttribute.getData());
                        newDocumentAttributes.put(attr.getKey(), myAttribute);
                    }
                }
            } else {
                if (log.isTraceEnabled())
                    log.trace("Testing " + attr.getKey() + " " + attr.getValue().getData());
                NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
                Attribute<?> attribute = attributes.floor(attr.getValue());
                boolean found = false;
                for (Attribute<?> thoseAttributes : attributes) {
                    if (thoseAttributes.equals(attr.getValue())) {
                        if (log.isTraceEnabled())
                            log.trace("found for " + attr.getKey() + " " + thoseAttributes.getData());
                        Cardinality card = (Cardinality) thoseAttributes;
                        Cardinality otherCard = (Cardinality) attr.getValue();
                        merge(card, otherCard, parser, merge);
                        found = true;
                        break;
                    } else {
                    }
                }
                if (!found) {
                    if (log.isTraceEnabled())
                        log.trace("Don't have " + attr.getKey() + " " + attr.getValue().getData());
                    newDocumentAttributes.put(attr.getKey(), attr.getValue());
                }
            }
        }
    }
    referenceDocument = new Document();
    if (log.isTraceEnabled())
        log.trace("entries" + newDocumentAttributes.entries());
    referenceDocument.putAll(newDocumentAttributes.entries().iterator(), false);
    if (log.isTraceEnabled())
        log.trace("currentDoc" + referenceDocument);
    referenceKey = topKey;
    return Maps.immutableEntry(topKey, referenceDocument);
}
Also used : Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Example 18 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class MinimumEstimation method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document currentDoc = new Document();
    TreeMultimap<String, Attribute<? extends Comparable<?>>> newDocumentAttributes = TreeMultimap.create();
    Key topKey = null;
    if (topKey == null)
        topKey = input.getKey();
    currentDoc = input.getValue();
    Map<?, ?> currentAttr = currentDoc.getDictionary();
    for (Entry<?, ?> attrE : currentAttr.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            if (attr.getValue() instanceof Attributes) {
                Attributes attrs = (Attributes) attr.getValue();
                NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
                for (Attribute<?> myAttribute : attrs.getAttributes()) {
                    if (log.isTraceEnabled())
                        log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
                    if (myAttribute instanceof Cardinality) {
                        Cardinality card = (Cardinality) myAttribute;
                        if (card.getContent().getEstimate().cardinality() >= minimumCount) {
                            newDocumentAttributes.put(attr.getKey(), myAttribute);
                        }
                    } else
                        throw new RuntimeException("Have " + myAttribute.getClass());
                }
            } else {
                Cardinality card = (Cardinality) attr.getValue();
                if (card.getContent().getEstimate().cardinality() >= minimumCount) {
                    newDocumentAttributes.put(attr.getKey(), card);
                }
            }
        }
    }
    currentDoc = new Document();
    if (log.isTraceEnabled())
        log.trace("entries" + newDocumentAttributes.entries());
    currentDoc.putAll(newDocumentAttributes.entries().iterator(), false);
    if (log.isTraceEnabled())
        log.trace("currentDoc" + currentDoc);
    return Maps.immutableEntry(topKey, currentDoc);
}
Also used : Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) Key(org.apache.accumulo.core.data.Key)

Example 19 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class IfThisTestFailsThenHitTermsAreBroken method runTestQuery.

protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms, Multimap<String, String> expectedHitTerms) throws Exception {
    log.debug("runTestQuery");
    log.trace("Creating QueryImpl");
    QueryImpl settings = new QueryImpl();
    settings.setBeginDate(startDate);
    settings.setEndDate(endDate);
    settings.setPagesize(Integer.MAX_VALUE);
    settings.setQueryAuthorizations(auths.serialize());
    settings.setQuery(querystr);
    settings.setParameters(extraParms);
    settings.setId(UUID.randomUUID());
    settings.setParameters(extraParms);
    log.debug("query: " + settings.getQuery());
    log.debug("logic: " + settings.getQueryLogicName());
    GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
    logic.setupQuery(config);
    HashSet<String> expectedSet = new HashSet<>(expected);
    HashSet<String> resultSet;
    resultSet = new HashSet<>();
    Set<Document> docs = new HashSet<>();
    for (Entry<Key, Value> entry : logic) {
        Document d = deserializer.apply(entry).getValue();
        log.debug(entry.getKey() + " => " + d);
        Attribute<?> attr = d.get("UUID.0");
        Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
        Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
        TypeAttribute<?> uuidAttr = (TypeAttribute<?>) attr;
        String uuid = uuidAttr.getType().getDelegate().toString();
        Assert.assertTrue("Received unexpected UUID: " + uuid, expected.contains(uuid));
        Attribute<?> hitTermAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
        if (hitTermAttribute instanceof Attributes) {
            Attributes hitTerms = (Attributes) hitTermAttribute;
            for (Attribute<?> hitTerm : hitTerms.getAttributes()) {
                log.debug("hitTerm:" + hitTerm);
                String hitString = hitTerm.getData().toString();
                log.debug("as string:" + hitString);
                log.debug("expectedHitTerms:" + expectedHitTerms);
                Assert.assertNotEquals(hitTerm.getTimestamp(), Long.MAX_VALUE);
                // make sure this hitString is in the map, and remove it
                boolean result = expectedHitTerms.get(uuid).remove(hitString);
                if (result == false) {
                    log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                    Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                } else {
                    log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                }
            }
        } else if (hitTermAttribute instanceof Attribute) {
            log.debug("hitTerm:" + (Attribute<?>) hitTermAttribute);
            String hitString = ((Attribute<?>) hitTermAttribute).getData().toString();
            log.debug("as string:" + hitString);
            log.debug("expectedHitTerms:" + expectedHitTerms);
            boolean result = expectedHitTerms.get(uuid).remove(hitString);
            if (result == false) {
                log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
                Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
            } else {
                log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerm:" + (Attribute<?>) hitTermAttribute);
            }
        }
        resultSet.add(uuid);
        docs.add(d);
    }
    if (expected.size() > resultSet.size()) {
        expectedSet.addAll(expected);
        expectedSet.removeAll(resultSet);
        for (String s : expectedSet) {
            log.warn("Missing: " + s);
        }
    }
    if (!expected.containsAll(resultSet)) {
        log.error("Expected results " + expected + " differ form actual results " + resultSet);
    }
    Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
    Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
    // the map is empty if there were no unexpected hit terms in it
    log.debug("expectedHitTerms:" + expectedHitTerms);
    Assert.assertTrue(expectedHitTerms.isEmpty());
}
Also used : Attribute(datawave.query.attributes.Attribute) TypeAttribute(datawave.query.attributes.TypeAttribute) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Attributes(datawave.query.attributes.Attributes) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Document(datawave.query.attributes.Document) GenericQueryConfiguration(datawave.webservice.query.configuration.GenericQueryConfiguration) QueryImpl(datawave.webservice.query.QueryImpl) TypeAttribute(datawave.query.attributes.TypeAttribute) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 20 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class UseOccurrenceToCountInJexlContextTest method runTestQuery.

protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms, Multimap<String, String> expectedHitTerms, Connector connector) throws Exception {
    log.debug("runTestQuery");
    log.trace("Creating QueryImpl");
    QueryImpl settings = new QueryImpl();
    settings.setBeginDate(startDate);
    settings.setEndDate(endDate);
    settings.setPagesize(Integer.MAX_VALUE);
    settings.setQueryAuthorizations(auths.serialize());
    settings.setQuery(querystr);
    settings.setParameters(extraParms);
    settings.setId(UUID.randomUUID());
    settings.setParameters(Collections.singletonMap("hit.list", "true"));
    log.debug("query: " + settings.getQuery());
    log.debug("logic: " + settings.getQueryLogicName());
    GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
    logic.setupQuery(config);
    HashSet<String> expectedSet = new HashSet<>(expected);
    HashSet<String> resultSet;
    resultSet = new HashSet<>();
    Set<Document> docs = new HashSet<>();
    for (Entry<Key, Value> entry : logic) {
        Document d = deserializer.apply(entry).getValue();
        log.debug(entry.getKey() + " => " + d);
        Attribute<?> attr = d.get("UUID.0");
        Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
        Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
        TypeAttribute<?> uuidAttr = (TypeAttribute<?>) attr;
        String uuid = uuidAttr.getType().getDelegate().toString();
        Assert.assertTrue("Received unexpected UUID: " + uuid, expected.contains(uuid));
        Attribute<?> hitTermAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
        if (hitTermAttribute instanceof Attributes) {
            Attributes hitTerms = (Attributes) hitTermAttribute;
            for (Attribute<?> hitTerm : hitTerms.getAttributes()) {
                log.debug("hitTerm:" + hitTerm);
                String hitString = hitTerm.getData().toString();
                log.debug("as string:" + hitString);
                log.debug("expectedHitTerms:" + expectedHitTerms);
                // make sure this hitString is in the map, and remove it
                boolean result = expectedHitTerms.get(uuid).remove(hitString);
                if (result == false) {
                    log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                    Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                } else {
                    log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
                }
            }
        } else if (hitTermAttribute instanceof Attribute) {
            log.debug("hitTerm:" + (Attribute<?>) hitTermAttribute);
            String hitString = ((Attribute<?>) hitTermAttribute).getData().toString();
            log.debug("as string:" + hitString);
            log.debug("expectedHitTerms:" + expectedHitTerms);
            boolean result = expectedHitTerms.get(uuid).remove(hitString);
            if (result == false) {
                log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
                Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
            } else {
                log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerm:" + (Attribute<?>) hitTermAttribute);
            }
        }
        resultSet.add(uuid);
        docs.add(d);
    }
    if (expected.size() > resultSet.size()) {
        expectedSet.addAll(expected);
        expectedSet.removeAll(resultSet);
        for (String s : expectedSet) {
            log.warn("Missing: " + s);
        }
    }
    if (!expected.containsAll(resultSet)) {
        log.error("Expected results " + expected + " differ form actual results " + resultSet);
    }
    Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
    Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
    // the map is empty if there were no unexpected hit terms in it
    log.debug("expectedHitTerms:" + expectedHitTerms);
    Assert.assertTrue(expectedHitTerms.isEmpty());
}
Also used : Attribute(datawave.query.attributes.Attribute) TypeAttribute(datawave.query.attributes.TypeAttribute) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Attributes(datawave.query.attributes.Attributes) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Document(datawave.query.attributes.Document) GenericQueryConfiguration(datawave.webservice.query.configuration.GenericQueryConfiguration) QueryImpl(datawave.webservice.query.QueryImpl) TypeAttribute(datawave.query.attributes.TypeAttribute) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Aggregations

Attribute (datawave.query.attributes.Attribute)21 Document (datawave.query.attributes.Document)17 Attributes (datawave.query.attributes.Attributes)14 Key (org.apache.accumulo.core.data.Key)13 HashSet (java.util.HashSet)11 Value (org.apache.accumulo.core.data.Value)8 TypeAttribute (datawave.query.attributes.TypeAttribute)6 Map (java.util.Map)6 Entry (java.util.Map.Entry)6 Cardinality (datawave.query.attributes.Cardinality)4 QueryImpl (datawave.webservice.query.QueryImpl)4 GenericQueryConfiguration (datawave.webservice.query.configuration.GenericQueryConfiguration)4 Set (java.util.Set)4 Type (datawave.data.type.Type)3 Multimap (com.google.common.collect.Multimap)2 Content (datawave.query.attributes.Content)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)2 TimingMetadata (datawave.query.attributes.TimingMetadata)2 DatawaveKey (datawave.query.data.parsers.DatawaveKey)2