Search in sources :

Example 1 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class JexlEvaluation method apply.

@Override
public boolean apply(Tuple3<Key, Document, DatawaveJexlContext> input) {
    Object o = script.execute(input.third());
    if (log.isTraceEnabled()) {
        log.trace("Evaluation of " + query + " against " + input.third() + " returned " + o);
    }
    boolean matched = isMatched(o);
    // Add delayed info to document
    if (matched && input.third() instanceof DelayedNonEventIndexContext) {
        ((DelayedNonEventIndexContext) input.third()).populateDocument(input.second());
    }
    if (arithmetic instanceof HitListArithmetic) {
        HitListArithmetic hitListArithmetic = (HitListArithmetic) arithmetic;
        if (matched) {
            Document document = input.second();
            Attributes attributes = new Attributes(input.second().isToKeep());
            for (ValueTuple hitTuple : hitListArithmetic.getHitTuples()) {
                ColumnVisibility cv = null;
                String term = hitTuple.getFieldName() + ':' + hitTuple.getValue();
                if (hitTuple.getSource() != null) {
                    cv = hitTuple.getSource().getColumnVisibility();
                }
                // fall back to extracting column visibility from document
                if (cv == null) {
                    // get the visibility for the record with this hit
                    cv = HitListArithmetic.getColumnVisibilityForHit(document, term);
                // if no visibility computed, then there were no hits that match fields still in the document......
                }
                if (cv != null) {
                    // unused
                    // will force an update to make the metadata valid
                    long timestamp = document.getTimestamp();
                    Content content = new Content(term, document.getMetadata(), document.isToKeep());
                    content.setColumnVisibility(cv);
                    attributes.add(content);
                }
            }
            if (attributes.size() > 0) {
                document.put(HIT_TERM_FIELD, attributes);
            }
        }
        hitListArithmetic.clear();
    }
    return matched;
}
Also used : DelayedNonEventIndexContext(datawave.query.jexl.DelayedNonEventIndexContext) HitListArithmetic(datawave.query.jexl.HitListArithmetic) ValueTuple(datawave.query.attributes.ValueTuple) Content(datawave.query.attributes.Content) Attributes(datawave.query.attributes.Attributes) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Document(datawave.query.attributes.Document)

Example 2 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class LimitFields method apply.

@Override
public Entry<Key, Document> apply(Entry<Key, Document> entry) {
    // key is the limited field name with _ORIGINAL_COUNT appended,
    // value will be set to the original count of that field in the document
    Map<String, Integer> limitedFieldCounts = new HashMap<>();
    Document document = entry.getValue();
    Map<String, String> hitTermMap = this.getHitTermMap(document);
    Multimap<String, Attribute<? extends Comparable<?>>> reducedMap = LinkedListMultimap.create();
    Map<String, Integer> countForFieldMap = Maps.newHashMap();
    // maps from the key with NO grouping context to a multimap of
    // key WITH grouping context to attributes:
    // DIRECTION : [DIRECTION.1 : [over,under], DIRECTION.2 : [sideways,down]]
    LoadingCache<String, Multimap<String, Attribute<? extends Comparable<?>>>> hits = CacheBuilder.newBuilder().build(new CacheLoader<String, Multimap<String, Attribute<? extends Comparable<?>>>>() {

        public Multimap<String, Attribute<? extends Comparable<?>>> load(String key) {
            return LinkedListMultimap.create();
        }
    });
    // maps from the key with NO grouping context to a multimap of
    // key WITH grouping context to attributes:
    // DIRECTION : [DIRECTION.1 : [over,under], DIRECTION.2 : [sideways,down]]
    @SuppressWarnings("serial") LoadingCache<String, Multimap<String, Attribute<? extends Comparable<?>>>> misses = CacheBuilder.newBuilder().build(new CacheLoader<String, Multimap<String, Attribute<? extends Comparable<?>>>>() {

        public Multimap<String, Attribute<? extends Comparable<?>>> load(String key) {
            return LinkedListMultimap.create();
        }
    });
    for (Map.Entry<String, Attribute<? extends Comparable<?>>> de : document.entrySet()) {
        String keyWithGrouping = de.getKey();
        String keyNoGrouping = keyWithGrouping;
        // if we have grouping context on, remove the grouping context
        if (keyNoGrouping.indexOf('.') != -1) {
            keyNoGrouping = keyNoGrouping.substring(0, keyNoGrouping.indexOf('.'));
        }
        // limit value for _ANYFIELD_
        if (this.limitFieldsMap.containsKey("_ANYFIELD_") && this.limitFieldsMap.containsKey(keyNoGrouping) == false) {
            this.limitFieldsMap.put(keyNoGrouping, this.limitFieldsMap.get("_ANYFIELD_"));
            log.trace("added " + keyNoGrouping + " - " + this.limitFieldsMap.get(keyNoGrouping) + " to the limitFieldsMap because of the _ANYFIELD_ entry");
        }
        if (this.limitFieldsMap.containsKey(keyNoGrouping)) {
            // look for the key without the grouping context
            if (log.isTraceEnabled())
                log.trace("limitFieldsMap contains " + keyNoGrouping);
            Attribute<?> attr = de.getValue();
            // used below if you un-comment to get all hits
            int limit = this.limitFieldsMap.get(keyNoGrouping);
            if (attr instanceof Attributes) {
                Attributes attrs = (Attributes) attr;
                Set<Attribute<? extends Comparable<?>>> attrSet = attrs.getAttributes();
                for (Attribute<? extends Comparable<?>> value : attrSet) {
                    manageHitsAndMisses(keyWithGrouping, keyNoGrouping, value, hitTermMap, hits, misses, countForFieldMap);
                }
            } else {
                manageHitsAndMisses(keyWithGrouping, keyNoGrouping, attr, hitTermMap, hits, misses, countForFieldMap);
            }
        }
    }
    for (String keyNoGrouping : countForFieldMap.keySet()) {
        int limit = this.limitFieldsMap.get(keyNoGrouping);
        Multimap<String, Attribute<? extends Comparable<?>>> hitMap = hits.getUnchecked(keyNoGrouping);
        for (String keyWithGrouping : hitMap.keySet()) {
            for (Attribute<? extends Comparable<?>> value : hitMap.get(keyWithGrouping)) {
                // if(limit <= 0) break; // comment this line if you want to get ALL hits even if the limit is exceeded
                reducedMap.put(keyWithGrouping, value);
                limit--;
            }
        }
        Multimap<String, Attribute<? extends Comparable<?>>> missMap = misses.getUnchecked(keyNoGrouping);
        for (String keyWithGrouping : missMap.keySet()) {
            for (Attribute<? extends Comparable<?>> value : missMap.get(keyWithGrouping)) {
                if (limit <= 0)
                    break;
                reducedMap.put(keyWithGrouping, value);
                limit--;
            }
        }
        if (log.isTraceEnabled()) {
            log.trace("reducedMap:" + reducedMap);
            log.trace("mapOfHits:" + hits.asMap());
            log.trace("mapOfMisses:" + misses.asMap());
        }
        // only generate an original count if a field was reduced
        if (countForFieldMap.get(keyNoGrouping) > this.limitFieldsMap.get(keyNoGrouping)) {
            limitedFieldCounts.put(keyNoGrouping + ORIGINAL_COUNT_SUFFIX, countForFieldMap.get(keyNoGrouping));
        }
    }
    // mutate the document with the changes collected in the above loop
    applyCounts(document, limitedFieldCounts);
    Map<String, Multimap<String, Attribute<? extends Comparable<?>>>> toRemove = Maps.newLinkedHashMap();
    toRemove.putAll(hits.asMap());
    toRemove.putAll(misses.asMap());
    makeReduction(document, toRemove, reducedMap);
    return entry;
}
Also used : HashMap(java.util.HashMap) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) Multimap(com.google.common.collect.Multimap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class IndexOnlyKeyToDocumentData method next.

@Override
public Entry<DocumentData, Document> next() {
    final Entry<Key, Value> next;
    try {
        next = this.seekNext(false);
    } catch (IOException e) {
        QueryException qe = new QueryException(DatawaveErrorCode.SEEK_NEXT_ELEMENT_ERROR, e);
        throw new DatawaveFatalQueryException(qe);
    }
    final Entry<DocumentData, Document> entry;
    if (null != next) {
        final List<Entry<Key, Value>> keyValues = new LinkedList<>();
        keyValues.add(next);
        Key docKey = getDocKey(next.getKey());
        final DocumentData documentData = new DocumentData(this.iteratorDocumentKey, Collections.singleton(docKey), keyValues, true);
        entry = Maps.immutableEntry(documentData, this.iteratorDocument);
    } else if (next == ITERATOR_COMPLETE_KEY) {
        QueryException qe = new QueryException(DatawaveErrorCode.FETCH_NEXT_ELEMENT_ERROR, MessageFormat.format("Fieldname: {0}, Range: {1}", this.fieldName, this.parent));
        throw (NoSuchElementException) (new NoSuchElementException().initCause(qe));
    } else {
        entry = null;
    }
    return entry;
}
Also used : IOException(java.io.IOException) Document(datawave.query.attributes.Document) LinkedList(java.util.LinkedList) DocumentData(datawave.query.iterator.aggregation.DocumentData) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) NoSuchElementException(java.util.NoSuchElementException)

Example 4 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class DocumentDataIterator method findNextDocument.

protected void findNextDocument() {
    documentData = null;
    try {
        Text cf = new Text();
        /*
             * Given that we are already at a document key, this method will continue to advance the underlying source until it is either exhausted (hasTop()
             * returns false), the returned key is not in the totalRange, and the current top key shares the same row and column family as the source's next
             * key.
             */
        while (documentData == null && source.hasTop()) {
            Key k = source.getTopKey();
            if (log.isTraceEnabled())
                log.trace("Sought to " + k);
            k.getColumnFamily(cf);
            if (!isEventKey(k)) {
                if (cf.find("fi\0") == 0) {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'fi')");
                    }
                    // Try to do an optimized jump over the field index
                    cf.set("fi\1");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
                } else if (cf.getLength() == 1 && cf.charAt(0) == 'd') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'd'");
                    }
                    // Try to do an optimized jump over the raw documents
                    cf.set("d\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
                } else if (cf.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'tf'");
                    }
                    // Try to do an optimized jump over the term frequencies
                    cf.set("tf\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug("Next()'ing over the current key");
                    }
                    source.next();
                }
            } else {
                Key pointer = source.getTopKey();
                if (dataTypeFilter.apply(pointer)) {
                    this.documentData = this.documentMapper.apply(Maps.immutableEntry(pointer, new Document()));
                }
                // now bounce to the next document as the documentMapper may have moved the source considerably
                Key nextDocKey = this.evaluationFilter != null ? this.evaluationFilter.getStopKey(pointer) : pointer.followingKey(PartialKey.ROW_COLFAM);
                if (totalRange.contains(nextDocKey)) {
                    Range nextCF = new Range(nextDocKey, true, totalRange.getEndKey(), totalRange.isEndKeyInclusive());
                    source.seek(nextCF, columnFamilies, inclusive);
                } else {
                    // skip to the end
                    Range nextCF = new Range(totalRange.getEndKey(), false, totalRange.getEndKey().followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME_DEL), false);
                    source.seek(nextCF, columnFamilies, inclusive);
                }
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not seek in findNextDocument", e);
    }
}
Also used : Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 5 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class DocumentDeserializer method apply.

@Override
public Entry<Key, Document> apply(Entry<Key, Value> from) {
    InputStream is = DocumentSerialization.consumeHeader(from.getValue().get());
    Document document = deserialize(is);
    return Maps.immutableEntry(from.getKey(), document);
}
Also used : InputStream(java.io.InputStream) Document(datawave.query.attributes.Document)

Aggregations

Document (datawave.query.attributes.Document)97 Key (org.apache.accumulo.core.data.Key)76 Test (org.junit.Test)35 Value (org.apache.accumulo.core.data.Value)30 HashSet (java.util.HashSet)28 Range (org.apache.accumulo.core.data.Range)26 Attribute (datawave.query.attributes.Attribute)18 Map (java.util.Map)17 Attributes (datawave.query.attributes.Attributes)16 HashMap (java.util.HashMap)16 AbstractMap (java.util.AbstractMap)14 TypeAttribute (datawave.query.attributes.TypeAttribute)13 Entry (java.util.Map.Entry)13 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)12 Set (java.util.Set)12 Content (datawave.query.attributes.Content)11 TypeMetadata (datawave.query.util.TypeMetadata)10 QueryImpl (datawave.webservice.query.QueryImpl)10 DatawaveKey (datawave.query.data.parsers.DatawaveKey)9 DatawaveJexlContext (datawave.query.jexl.DatawaveJexlContext)9