Examples with Content - datawave.query.attributes.Content

Example 1 with Content

use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.

the class JexlEvaluation method apply.

@Override
public boolean apply(Tuple3<Key, Document, DatawaveJexlContext> input) {
    Object o = script.execute(input.third());
    if (log.isTraceEnabled()) {
        log.trace("Evaluation of " + query + " against " + input.third() + " returned " + o);
    }
    boolean matched = isMatched(o);
    // Add delayed info to document
    if (matched && input.third() instanceof DelayedNonEventIndexContext) {
        ((DelayedNonEventIndexContext) input.third()).populateDocument(input.second());
    }
    if (arithmetic instanceof HitListArithmetic) {
        HitListArithmetic hitListArithmetic = (HitListArithmetic) arithmetic;
        if (matched) {
            Document document = input.second();
            Attributes attributes = new Attributes(input.second().isToKeep());
            for (ValueTuple hitTuple : hitListArithmetic.getHitTuples()) {
                ColumnVisibility cv = null;
                String term = hitTuple.getFieldName() + ':' + hitTuple.getValue();
                if (hitTuple.getSource() != null) {
                    cv = hitTuple.getSource().getColumnVisibility();
                }
                // fall back to extracting column visibility from document
                if (cv == null) {
                    // get the visibility for the record with this hit
                    cv = HitListArithmetic.getColumnVisibilityForHit(document, term);
                // if no visibility computed, then there were no hits that match fields still in the document......
                }
                if (cv != null) {
                    // unused
                    // will force an update to make the metadata valid
                    long timestamp = document.getTimestamp();
                    Content content = new Content(term, document.getMetadata(), document.isToKeep());
                    content.setColumnVisibility(cv);
                    attributes.add(content);
                }
            }
            if (attributes.size() > 0) {
                document.put(HIT_TERM_FIELD, attributes);
            }
        }
        hitListArithmetic.clear();
    }
    return matched;
}

Also used : DelayedNonEventIndexContext(datawave.query.jexl.DelayedNonEventIndexContext) HitListArithmetic(datawave.query.jexl.HitListArithmetic) ValueTuple(datawave.query.attributes.ValueTuple) Content(datawave.query.attributes.Content) Attributes(datawave.query.attributes.Attributes) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Document(datawave.query.attributes.Document)

Example 2 with Content

use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.

the class HitsAreAlwaysIncludedCommonalityTokenTest method runTestQuery.

protected void runTestQuery(Connector connector, String queryString, Date startDate, Date endDate, Map<String, String> extraParms, Collection<String> goodResults) throws Exception {
    QueryImpl settings = new QueryImpl();
    settings.setBeginDate(startDate);
    settings.setEndDate(endDate);
    settings.setPagesize(Integer.MAX_VALUE);
    settings.setQueryAuthorizations(auths.serialize());
    settings.setQuery(queryString);
    settings.setParameters(extraParms);
    settings.setId(UUID.randomUUID());
    log.debug("query: " + settings.getQuery());
    log.debug("logic: " + settings.getQueryLogicName());
    GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
    logic.setupQuery(config);
    Set<Document> docs = new HashSet<>();
    for (Entry<Key, Value> entry : logic) {
        Document d = deserializer.apply(entry).getValue();
        log.trace(entry.getKey() + " => " + d);
        docs.add(d);
        Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
        if (hitAttribute instanceof Attributes) {
            Attributes attributes = (Attributes) hitAttribute;
            for (Attribute attr : attributes.getAttributes()) {
                if (attr instanceof Content) {
                    Content content = (Content) attr;
                    Assert.assertTrue(goodResults.contains(content.getContent()));
                }
            }
        } else if (hitAttribute instanceof Content) {
            Content content = (Content) hitAttribute;
            Assert.assertTrue(goodResults.contains(content.getContent()));
        }
        // remove from goodResults as we find the expected return fields
        log.debug("goodResults: " + goodResults);
        Map<String, Attribute<? extends Comparable<?>>> dictionary = d.getDictionary();
        log.debug("dictionary:" + dictionary);
        for (Entry<String, Attribute<? extends Comparable<?>>> dictionaryEntry : dictionary.entrySet()) {
            Attribute<? extends Comparable<?>> attribute = dictionaryEntry.getValue();
            if (attribute instanceof Attributes) {
                for (Attribute attr : ((Attributes) attribute).getAttributes()) {
                    String toFind = dictionaryEntry.getKey() + ":" + attr;
                    boolean found = goodResults.remove(toFind);
                    if (found)
                        log.debug("removed " + toFind);
                    else
                        log.debug("Did not remove " + toFind);
                }
            } else {
                String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue();
                boolean found = goodResults.remove(toFind);
                if (found)
                    log.debug("removed " + toFind);
                else
                    log.debug("Did not remove " + toFind);
            }
        }
        Assert.assertTrue(goodResults + " was not empty", goodResults.isEmpty());
    }
    Assert.assertTrue("No docs were returned!", !docs.isEmpty());
}

Also used : Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) GenericQueryConfiguration(datawave.webservice.query.configuration.GenericQueryConfiguration) QueryImpl(datawave.webservice.query.QueryImpl) Content(datawave.query.attributes.Content) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 3 with Content

use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.

the class ContentTransform method apply.

@Nullable
@Override
public Map.Entry<Key, Document> apply(@Nullable Map.Entry<Key, Document> keyDocumentEntry) {
    if (keyDocumentEntry != null) {
        Document document = keyDocumentEntry.getValue();
        Key documentKey = DocumentTransformer.correctKey(keyDocumentEntry.getKey());
        String colf = documentKey.getColumnFamily().toString();
        int index = colf.indexOf("\0");
        String uid = colf.substring(index + 1);
        for (String contentFieldName : this.contentFieldNames) {
            if (document.containsKey(contentFieldName)) {
                Attribute<?> contentField = document.remove(contentFieldName);
                if (contentField.getData().toString().equalsIgnoreCase("true")) {
                    Content c = new Content(uid, contentField.getMetadata(), document.isToKeep());
                    document.put(contentFieldName, c, false, this.reducedResponse);
                }
            }
        }
    }
    return keyDocumentEntry;
}

Also used : Content(datawave.query.attributes.Content) Document(datawave.query.attributes.Document) Key(org.apache.accumulo.core.data.Key) Nullable(javax.annotation.Nullable)

Example 4 with Content

use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.

the class DocumentTransformerSupport method convertMappedAttribute.

private Attribute<?> convertMappedAttribute(Attribute<?> attribute) {
    String attributeString = attribute.getData().toString();
    int idx = attributeString.indexOf(':');
    if (idx != -1) {
        String firstPart = attributeString.substring(0, idx);
        String secondPart = attributeString.substring(idx);
        // Apply the reverse mapping to make the field name human-readable again
        if (null != this.getQm()) {
            firstPart = this.getQm().aliasFieldNameReverseModel(firstPart);
        }
        attribute = new Content(firstPart + secondPart, attribute.getMetadata(), attribute.isToKeep());
    }
    return attribute;
}

Also used : Content(datawave.query.attributes.Content)

Example 5 with Content

use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.

the class TermOffsetPopulator method getContextMap.

/**
 * Build TermOffset map for use in JexlEvaluation
 *
 * @param docKey
 *            key that maps to a document
 * @param keys
 *            set of keys that map to hits on tf fields
 * @param fields
 *            set of fields to remove from the search space
 * @return
 */
public Map<String, Object> getContextMap(Key docKey, Set<Key> keys, Set<String> fields) {
    document = new Document();
    TermFrequencyIterator tfSource;
    // Do not prune if no fields exist or if the tf fields would prune to nothing. TODO skip tf entirely if this would prune to zero
    if (fields == null || fields.isEmpty() || fields.size() == termFrequencyFieldValues.keySet().size()) {
        tfSource = new TermFrequencyIterator(termFrequencyFieldValues, keys);
    } else {
        // There are fields to remove, reduce the search space and continue
        Multimap<String, String> tfFVs = HashMultimap.create(termFrequencyFieldValues);
        fields.forEach(tfFVs::removeAll);
        tfSource = new TermFrequencyIterator(tfFVs, keys);
        if (tfFVs.size() == 0) {
            log.error("Created a TFIter with no field values. Orig fields: " + termFrequencyFieldValues.keySet() + " fields to remove: " + fields);
        }
    }
    Range range = getRange(keys);
    try {
        tfSource.init(source, null, null);
        tfSource.seek(getRange(keys), null, false);
    } catch (IOException e) {
        log.error("Seek to the range failed: " + range, e);
    }
    // set the document context on the filter
    if (evaluationFilter != null) {
        evaluationFilter.startNewDocument(docKey);
    }
    Map<String, TermFrequencyList> termOffsetMap = Maps.newHashMap();
    while (tfSource.hasTop()) {
        Key key = tfSource.getTopKey();
        FieldValue fv = FieldValue.getFieldValue(key);
        // add the zone and term to our internal document
        Content attr = new Content(fv.getValue(), source.getTopKey(), evaluationFilter == null || evaluationFilter.keep(key));
        // no need to apply the evaluation filter here as the TermFrequencyIterator above is already doing more filtering than we can do here.
        // So this filter is simply extraneous. However if the an EventDataQueryFilter implementation gets smarter somehow, then it can be added back in
        // here.
        // For example the AncestorQueryLogic may require this....
        // if (evaluationFilter == null || evaluationFilter.apply(Maps.immutableEntry(key, StringUtils.EMPTY_STRING))) {
        this.document.put(fv.getField(), attr);
        TreeMultimap<TermFrequencyList.Zone, TermWeightPosition> offsets = TreeMultimap.create();
        try {
            TermWeight.Info twInfo = TermWeight.Info.parseFrom(tfSource.getTopValue().get());
            // if no content expansion fields then assume every field is permitted for unfielded content functions
            TermFrequencyList.Zone twZone = new TermFrequencyList.Zone(fv.getField(), (contentExpansionFields == null || contentExpansionFields.isEmpty() || contentExpansionFields.contains(fv.getField())), TermFrequencyList.getEventId(key));
            TermWeightPosition.Builder position = new TermWeightPosition.Builder();
            for (int i = 0; i < twInfo.getTermOffsetCount(); i++) {
                position.setTermWeightOffsetInfo(twInfo, i);
                offsets.put(twZone, position.build());
                position.reset();
            }
        } catch (InvalidProtocolBufferException e) {
            log.error("Could not deserialize TermWeight protocol buffer for: " + source.getTopKey());
            return null;
        }
        // First time looking up this term in a field
        TermFrequencyList tfl = termOffsetMap.get(fv.getValue());
        if (null == tfl) {
            termOffsetMap.put(fv.getValue(), new TermFrequencyList(offsets));
        } else {
            // Merge in the offsets for the current field+term with all previous
            // offsets from other fields in the same term
            tfl.addOffsets(offsets);
        }
        try {
            tfSource.next();
        } catch (IOException ioe) {
            log.error("Next failed: " + range, ioe);
            break;
        }
    }
    // Load the actual map into map that will be put into the JexlContext
    Map<String, Object> map = new HashMap<>();
    map.put(Constants.TERM_OFFSET_MAP_JEXL_VARIABLE_NAME, termOffsetMap);
    return map;
}

Also used : HashMap(java.util.HashMap) TermFrequencyIterator(datawave.core.iterators.TermFrequencyIterator) Document(datawave.query.attributes.Document) TermWeightPosition(datawave.ingest.protobuf.TermWeightPosition) TermFrequencyList(datawave.query.jexl.functions.TermFrequencyList) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) Content(datawave.query.attributes.Content) TermWeight(datawave.ingest.protobuf.TermWeight) Key(org.apache.accumulo.core.data.Key)

Aggregations

Content (datawave.query.attributes.Content)14 Document (datawave.query.attributes.Document)11 Key (org.apache.accumulo.core.data.Key)10 DatawaveJexlContext (datawave.query.jexl.DatawaveJexlContext)5 Test (org.junit.Test)5 Attributes (datawave.query.attributes.Attributes)4 HitListArithmetic (datawave.query.jexl.HitListArithmetic)3 Attribute (datawave.query.attributes.Attribute)2 TermFrequencyList (datawave.query.jexl.functions.TermFrequencyList)2 Tuple3 (datawave.query.util.Tuple3)2 QueryImpl (datawave.webservice.query.QueryImpl)2 GenericQueryConfiguration (datawave.webservice.query.configuration.GenericQueryConfiguration)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Value (org.apache.accumulo.core.data.Value)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 TermFrequencyIterator (datawave.core.iterators.TermFrequencyIterator)1 TermWeight (datawave.ingest.protobuf.TermWeight)1 TermWeightPosition (datawave.ingest.protobuf.TermWeightPosition)1 ValueTuple (datawave.query.attributes.ValueTuple)1