Search in sources :

Example 6 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class HitsAreAlwaysIncludedCommonalityTokenTest method runTestQuery.

protected void runTestQuery(Connector connector, String queryString, Date startDate, Date endDate, Map<String, String> extraParms, Collection<String> goodResults) throws Exception {
    QueryImpl settings = new QueryImpl();
    settings.setBeginDate(startDate);
    settings.setEndDate(endDate);
    settings.setPagesize(Integer.MAX_VALUE);
    settings.setQueryAuthorizations(auths.serialize());
    settings.setQuery(queryString);
    settings.setParameters(extraParms);
    settings.setId(UUID.randomUUID());
    log.debug("query: " + settings.getQuery());
    log.debug("logic: " + settings.getQueryLogicName());
    GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
    logic.setupQuery(config);
    Set<Document> docs = new HashSet<>();
    for (Entry<Key, Value> entry : logic) {
        Document d = deserializer.apply(entry).getValue();
        log.trace(entry.getKey() + " => " + d);
        docs.add(d);
        Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
        if (hitAttribute instanceof Attributes) {
            Attributes attributes = (Attributes) hitAttribute;
            for (Attribute attr : attributes.getAttributes()) {
                if (attr instanceof Content) {
                    Content content = (Content) attr;
                    Assert.assertTrue(goodResults.contains(content.getContent()));
                }
            }
        } else if (hitAttribute instanceof Content) {
            Content content = (Content) hitAttribute;
            Assert.assertTrue(goodResults.contains(content.getContent()));
        }
        // remove from goodResults as we find the expected return fields
        log.debug("goodResults: " + goodResults);
        Map<String, Attribute<? extends Comparable<?>>> dictionary = d.getDictionary();
        log.debug("dictionary:" + dictionary);
        for (Entry<String, Attribute<? extends Comparable<?>>> dictionaryEntry : dictionary.entrySet()) {
            Attribute<? extends Comparable<?>> attribute = dictionaryEntry.getValue();
            if (attribute instanceof Attributes) {
                for (Attribute attr : ((Attributes) attribute).getAttributes()) {
                    String toFind = dictionaryEntry.getKey() + ":" + attr;
                    boolean found = goodResults.remove(toFind);
                    if (found)
                        log.debug("removed " + toFind);
                    else
                        log.debug("Did not remove " + toFind);
                }
            } else {
                String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue();
                boolean found = goodResults.remove(toFind);
                if (found)
                    log.debug("removed " + toFind);
                else
                    log.debug("Did not remove " + toFind);
            }
        }
        Assert.assertTrue(goodResults + " was not empty", goodResults.isEmpty());
    }
    Assert.assertTrue("No docs were returned!", !docs.isEmpty());
}
Also used : Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) GenericQueryConfiguration(datawave.webservice.query.configuration.GenericQueryConfiguration) QueryImpl(datawave.webservice.query.QueryImpl) Content(datawave.query.attributes.Content) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 7 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class QueryIteratorIT method eval.

/**
 * Deserialize and evaluate the document, expects 0 to 1 documents
 *
 * @param docKeyHit
 *            the expected hit Key, null if no hit expected
 * @param docKeys
 *            the expected values
 * @throws IOException
 */
protected void eval(Key docKeyHit, Map<String, List<String>> docKeys) throws IOException {
    // asserts for a hit or miss
    if (docKeyHit == null) {
        assertFalse(iterator.hasTop());
    } else {
        assertTrue("Expected hit, but got none", iterator.hasTop());
        Key next = iterator.getTopKey();
        assertNotNull(next);
        assertEquals(next.getRow().toString(), docKeyHit.getRow().toString());
        assertEquals(next.getColumnFamily().toString(), docKeyHit.getColumnFamily().toString());
        // asserts for document build
        Value topValue = iterator.getTopValue();
        assertNotNull(topValue);
        Map.Entry<Key, Document> deserializedValue = deserialize(topValue);
        assertNotNull(deserializedValue.getValue());
        Document d = deserializedValue.getValue();
        assertNotNull(d);
        // -1 is for RECORD_ID field and -1 for HIT_LIST if configured
        int baseSize = d.getDictionary().size() - 1;
        int docSize = isExpectHitTerm() ? baseSize - 1 : baseSize;
        assertEquals("Unexpected doc size: " + d.getDictionary().size() + "\nGot: " + docSize + "\n" + "expected: " + docKeys, docKeys.keySet().size(), docSize);
        // validate the hitlist
        assertEquals("HIT_TERM presence expected: " + isExpectHitTerm() + " actual: " + (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), isExpectHitTerm());
        // verify hits for each specified field
        for (String field : docKeys.keySet()) {
            List<String> expected = docKeys.get(field);
            if (expected.size() == 1) {
                // verify the only doc
                Attribute<?> docAttr = d.getDictionary().get(field);
                if (docAttr instanceof Attributes) {
                    // Special handling of Content attributes, typically when TermFrequencies are looked up.
                    // TFs append Content attributes which results in Attributes coming back instead of a single Attribute
                    Set<?> datas = (Set<?>) docAttr.getData();
                    Set<String> dataStrings = datas.stream().map(Object::toString).collect(Collectors.toSet());
                    boolean stringsMatch = dataStrings.contains(expected.get(0));
                    assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
                } else {
                    boolean stringsMatch = docAttr.getData().toString().equals(expected.get(0));
                    assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
                }
            } else {
                // the data should be a set, verify it matches expected
                Object dictData = d.getDictionary().get(field).getData();
                assertNotNull(dictData);
                assertTrue("Expected " + expected.size() + " values for '" + field + "' found 1, '" + dictData.toString() + "'\nexpected: " + expected, dictData instanceof Set);
                Set dictSet = (Set) dictData;
                assertEquals("Expected " + expected.size() + " values for '" + field + "' found " + dictSet.size() + "\nfound: " + dictSet.toString() + "\nexpected: " + expected, dictSet.size(), expected.size());
                Iterator<Attribute> dictIterator = dictSet.iterator();
                while (dictIterator.hasNext()) {
                    String foundString = dictIterator.next().getData().toString();
                    assertTrue("could not find " + foundString + " in results! Still had " + expected, expected.remove(foundString));
                }
                // verify that the expected set is now empty
                assertEquals(0, expected.size());
            }
        }
        // there should be no other hits
        iterator.next();
    }
}
Also used : Set(java.util.Set) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Value(org.apache.accumulo.core.data.Value) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 8 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class DocumentProjection method trim.

private Document trim(Document d) {
    Map<String, Attribute<? extends Comparable<?>>> dict = d.getDictionary();
    Document newDoc = new Document();
    for (Entry<String, Attribute<? extends Comparable<?>>> entry : dict.entrySet()) {
        String fieldName = entry.getKey();
        Attribute<?> attr = entry.getValue();
        if (projection.apply(fieldName)) {
            // should be excluded via the blacklist
            if (projection.isUseBlacklist()) {
                if (attr instanceof Document) {
                    Document newSubDoc = trim((Document) attr);
                    if (0 < newSubDoc.size()) {
                        newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse);
                    }
                    continue;
                } else if (attr instanceof Attributes) {
                    Attributes subAttrs = trim((Attributes) attr, fieldName);
                    if (0 < subAttrs.size()) {
                        newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse);
                    }
                    continue;
                }
            }
            // We just want to add this subtree
            newDoc.put(fieldName, (Attribute<?>) attr.copy(), this.includeGroupingContext, this.reducedResponse);
        } else if (!projection.isUseBlacklist()) {
            // if there is a child that does match the whitelist
            if (attr instanceof Document) {
                Document newSubDoc = trim((Document) attr);
                if (0 < newSubDoc.size()) {
                    newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse);
                }
            } else if (attr instanceof Attributes) {
                // Since Document instances can be nested under attributes and vice-versa
                // all the way down, we need to pass along the fieldName so that when we
                // have come up with a nested document it can be evaluated by its own name
                Attributes subAttrs = trim((Attributes) attr, fieldName);
                if (0 < subAttrs.size()) {
                    newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse);
                }
            }
        }
    }
    return newDoc;
}
Also used : Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document)

Example 9 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class AbstractVersionFilter method validate.

/*
     * Validate the UID based on the specified pattern. An invalid UID may mean different things in different contexts, such as
     * "No, this Key does not belong to the most current version." but should always prevent the unmodified input from being returned.
     * 
     * @param uid
     * 
     * @param pattern
     * 
     * @param key
     * 
     * @param document
     * 
     * @param isMultiMapping indicates whether multiple data types are mapped, which helps make pattern lookup a little more efficient
     * 
     * @return
     */
private boolean validate(final String dataType, final String uid, final String pattern, final Key key, final Document document, boolean isMultiMapping) {
    boolean isValid = true;
    if (!uid.isEmpty()) {
        final String[] split = uid.split(pattern);
        try {
            final Text row = key.getRow();
            if (split.length == 2) {
                final String matchedUid = uid.substring(0, (uid.length() - split[1].length()));
                isValid = this.validate(row, dataType, matchedUid, isMultiMapping);
            } else if (uid.matches(pattern)) {
                isValid = this.validate(row, dataType, uid, isMultiMapping);
            }
            if (!isValid) {
                if (null != document) {
                    if (null != document) {
                        final Set<Entry<String, Attribute<? extends Comparable<?>>>> entries = new HashSet<>(document.entrySet());
                        for (final Entry<String, Attribute<? extends Comparable<?>>> entry : entries) {
                            document.removeAll(entry.getKey());
                        }
                    }
                }
            }
        } catch (final Exception e) {
            LOG.error("Could not validate normalized version for " + key, e);
        }
    }
    return isValid;
}
Also used : Entry(java.util.Map.Entry) Attribute(datawave.query.attributes.Attribute) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) HashSet(java.util.HashSet)

Example 10 with Attribute

use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.

the class AttributeToCardinality method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document prevDoc = input.getValue();
    Key key = input.getKey();
    // for cardinalities, only use the visibility metadata
    Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, prevDoc.getColumnVisibility(), -1);
    Document newDoc = new Document();
    Map<?, ?> dictionary = (Map<?, ?>) prevDoc.getData();
    TreeMap<String, Attribute<? extends Comparable<?>>> newDictionary = Maps.newTreeMap();
    DatawaveKey parser = new DatawaveKey(input.getKey());
    for (Entry<?, ?> attrE : dictionary.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            Attribute<?> attribute = attr.getValue();
            if (attribute instanceof Attributes) {
                Attributes attrs = (Attributes) attribute;
                Attributes newAttrs = new Attributes(attrs.isToKeep());
                for (Attribute<?> attributeItem : attrs.getAttributes()) {
                    Cardinality card = null;
                    if (attributeItem instanceof Cardinality) {
                        card = (Cardinality) attributeItem;
                    } else {
                        FieldValueCardinality fvC = new FieldValueCardinality();
                        fvC.setContent(attributeItem.getData().toString());
                        fvC.setDoc(prevDoc);
                        card = new Cardinality(fvC, metadata, attrs.isToKeep());
                        if (log.isTraceEnabled())
                            log.trace("Adding from attributes " + attr.getKey() + " " + attributeItem.getData());
                    }
                    newAttrs.add(card);
                }
                newDictionary.put(attr.getKey(), newAttrs);
            } else {
                Cardinality card = null;
                if (attribute instanceof Cardinality) {
                    card = (Cardinality) attribute;
                } else {
                    FieldValueCardinality fvC = new FieldValueCardinality();
                    fvC.setContent(attribute.getData().toString());
                    fvC.setDoc(prevDoc);
                    card = new Cardinality(fvC, metadata, attribute.isToKeep());
                    if (log.isTraceEnabled())
                        log.trace("Adding " + parser.getUid() + " " + attr.getKey() + " " + attribute.getData() + " " + fvC.getEstimate().cardinality());
                }
                newDictionary.put(attr.getKey(), card);
            }
        }
    }
    newDoc.putAll(newDictionary.entrySet().iterator(), false);
    return Maps.immutableEntry(key, newDoc);
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) TreeMap(java.util.TreeMap) Map(java.util.Map) DatawaveKey(datawave.query.data.parsers.DatawaveKey) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Aggregations

Attribute (datawave.query.attributes.Attribute)21 Document (datawave.query.attributes.Document)17 Attributes (datawave.query.attributes.Attributes)14 Key (org.apache.accumulo.core.data.Key)13 HashSet (java.util.HashSet)11 Value (org.apache.accumulo.core.data.Value)8 TypeAttribute (datawave.query.attributes.TypeAttribute)6 Map (java.util.Map)6 Entry (java.util.Map.Entry)6 Cardinality (datawave.query.attributes.Cardinality)4 QueryImpl (datawave.webservice.query.QueryImpl)4 GenericQueryConfiguration (datawave.webservice.query.configuration.GenericQueryConfiguration)4 Set (java.util.Set)4 Type (datawave.data.type.Type)3 Multimap (com.google.common.collect.Multimap)2 Content (datawave.query.attributes.Content)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)2 TimingMetadata (datawave.query.attributes.TimingMetadata)2 DatawaveKey (datawave.query.data.parsers.DatawaveKey)2