Search in sources :

Example 6 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class AttributeToCardinality method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document prevDoc = input.getValue();
    Key key = input.getKey();
    // for cardinalities, only use the visibility metadata
    Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, prevDoc.getColumnVisibility(), -1);
    Document newDoc = new Document();
    Map<?, ?> dictionary = (Map<?, ?>) prevDoc.getData();
    TreeMap<String, Attribute<? extends Comparable<?>>> newDictionary = Maps.newTreeMap();
    DatawaveKey parser = new DatawaveKey(input.getKey());
    for (Entry<?, ?> attrE : dictionary.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            Attribute<?> attribute = attr.getValue();
            if (attribute instanceof Attributes) {
                Attributes attrs = (Attributes) attribute;
                Attributes newAttrs = new Attributes(attrs.isToKeep());
                for (Attribute<?> attributeItem : attrs.getAttributes()) {
                    Cardinality card = null;
                    if (attributeItem instanceof Cardinality) {
                        card = (Cardinality) attributeItem;
                    } else {
                        FieldValueCardinality fvC = new FieldValueCardinality();
                        fvC.setContent(attributeItem.getData().toString());
                        fvC.setDoc(prevDoc);
                        card = new Cardinality(fvC, metadata, attrs.isToKeep());
                        if (log.isTraceEnabled())
                            log.trace("Adding from attributes " + attr.getKey() + " " + attributeItem.getData());
                    }
                    newAttrs.add(card);
                }
                newDictionary.put(attr.getKey(), newAttrs);
            } else {
                Cardinality card = null;
                if (attribute instanceof Cardinality) {
                    card = (Cardinality) attribute;
                } else {
                    FieldValueCardinality fvC = new FieldValueCardinality();
                    fvC.setContent(attribute.getData().toString());
                    fvC.setDoc(prevDoc);
                    card = new Cardinality(fvC, metadata, attribute.isToKeep());
                    if (log.isTraceEnabled())
                        log.trace("Adding " + parser.getUid() + " " + attr.getKey() + " " + attribute.getData() + " " + fvC.getEstimate().cardinality());
                }
                newDictionary.put(attr.getKey(), card);
            }
        }
    }
    newDoc.putAll(newDictionary.entrySet().iterator(), false);
    return Maps.immutableEntry(key, newDoc);
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) TreeMap(java.util.TreeMap) Map(java.util.Map) DatawaveKey(datawave.query.data.parsers.DatawaveKey) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Example 7 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class CardinalityAggregator method apply.

@Override
public Key apply(SortedKeyValueIterator<Key, Value> itr, Document doc, AttributeFactory attrs) throws IOException {
    Key key = itr.getTopKey();
    Text row = key.getRow();
    ByteSequence pointer = parsePointer(key.getColumnQualifierData());
    Key nextKey = key;
    while (nextKey != null && samePointer(row, pointer, nextKey)) {
        DatawaveKey topKey = new DatawaveKey(nextKey);
        String field = topKey.getFieldName();
        String value = topKey.getFieldValue();
        FieldValueCardinality fvC = null;
        byte[] currentValue = itr.getTopValue().get();
        try {
            if (currentValue.length > 0) {
                fvC = new FieldValueCardinality(HyperLogLogPlus.Builder.build(currentValue));
                if (log.isTraceEnabled()) {
                    log.trace("Set cardinality from FI value");
                }
            }
        } catch (Exception e) {
            if (log.isTraceEnabled()) {
                log.trace("Exception encountered " + e);
            }
        }
        if (null == fvC) {
            if (log.isTraceEnabled())
                log.trace("Building cardinality for " + topKey.getUid());
            fvC = new FieldValueCardinality();
            if (setDocIds)
                fvC.setDocId(topKey.getUid());
        }
        fvC.setContent(value);
        // for cardinalities, only use the visibility metadata
        Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, itr.getTopKey().getColumnVisibility(), -1);
        Cardinality card = new Cardinality(fvC, metadata, doc.isToKeep());
        // only keep fields that are index only
        card.setToKeep(fieldsToKeep == null || fieldsToKeep.contains(JexlASTHelper.removeGroupingContext(field)));
        doc.put(field, card);
        key = nextKey;
        itr.next();
        nextKey = (itr.hasTop() ? itr.getTopKey() : null);
    }
    return TLD.buildParentKey(row, pointer, TLD.parseFieldAndValueFromFI(key.getColumnFamilyData(), key.getColumnQualifierData()), key.getColumnVisibility(), key.getTimestamp());
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Text(org.apache.hadoop.io.Text) DatawaveKey(datawave.query.data.parsers.DatawaveKey) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) ByteSequence(org.apache.accumulo.core.data.ByteSequence) IOException(java.io.IOException)

Example 8 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.

/**
 * Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
 *
 * @param filter
 * @param maxNextCount
 * @return
 */
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
    EventDataQueryFilter rootFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        @Override
        public boolean peek(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        /**
         * Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            DatawaveKey key = new DatawaveKey(k);
            return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            throw new UnsupportedOperationException();
        }
    };
    filter.addFilter(rootFilter);
    Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
    return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Entry(java.util.Map.Entry) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) Nullable(javax.annotation.Nullable)

Example 9 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class FieldIndexKeyDataTypeFilter method getSeekRange.

@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
    // early return if possible
    if (maxNextBeforeSeek == -1 || nextCount < maxNextBeforeSeek) {
        return null;
    }
    // parse the key to get the value and dataType
    DatawaveKey datawaveKey = new DatawaveKey(current);
    // test if this key should have been accepted
    if (sortedDataTypes.contains(datawaveKey.getDataType())) {
        return null;
    }
    // still here, find the next valid sorted data type and apply it for a new range
    String nextDataType = null;
    for (String dataType : sortedDataTypes) {
        if (dataType.compareTo(datawaveKey.getDataType()) > 0) {
            nextDataType = dataType;
            break;
        }
    }
    // ensure a dataType was selected
    Key startKey;
    boolean inclusiveStart;
    if (nextDataType == null) {
        // roll over the key
        // this will be somewhat blind since the next value is not known
        startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + Constants.MAX_UNICODE_STRING));
        inclusiveStart = false;
    } else {
        // generate a new range with the current value and new dataType
        startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + nextDataType));
        inclusiveStart = true;
    }
    if (startKey.compareTo(endKey) > 0) {
        // generate an empty range
        return new Range(startKey, false, startKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), false);
    }
    return new Range(startKey, inclusiveStart, endKey, endKeyInclusive);
}
Also used : Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 10 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIterator method next.

@Override
public void next() throws IOException {
    // We need to null this every time even though our fieldname and fieldvalue won't
    // change, we have the potential for the column visibility to change
    document = new Document();
    tk = null;
    // reusable buffers
    Text row = new Text(), cf = new Text(), cq = new Text();
    if (scanRange == null) {
        buildScanRangeLazily();
    }
    if (log.isTraceEnabled()) {
        log.trace(source.hasTop() + " nexting on " + scanRange);
    }
    while (source.hasTop() && tk == null) {
        Key top = source.getTopKey();
        row = top.getRow(row);
        top.getColumnFamily(cf);
        top.getColumnQualifier(cq);
        if (!cq.toString().endsWith(field)) {
            if (log.isTraceEnabled()) {
                log.trace(cq + " does not end with " + field);
            }
            source.next();
            continue;
        }
        DatawaveKey key = new DatawaveKey(top);
        Key nextTop = top;
        for (int i = 0; i < 256 && source.hasTop() && key.getFieldName().compareTo(field) < 0; ++i) {
            source.next();
            nextTop = source.getTopKey();
            if (nextTop == null)
                break;
            key = new DatawaveKey(nextTop);
            if (log.isTraceEnabled()) {
                log.trace("Have key " + key + " < " + field);
            }
        }
        if (nextTop == null)
            continue;
        if (key.getFieldName().compareTo(field) < 0) {
            if (log.isTraceEnabled()) {
                log.trace("Have key " + key + " is less than " + field);
            }
            StringBuilder builder = new StringBuilder(key.getDataType()).append(Constants.NULL).append(key.getUid()).append(Constants.NULL).append(key.getFieldValue()).append(Constants.NULL).append(field);
            Key nextKey = new Key(row, cf, new Text(builder.toString()));
            Range newRange = new Range(nextKey, true, scanRange.getEndKey(), scanRange.isEndKeyInclusive());
            source.seek(newRange, seekColumnFamilies, true);
            continue;
        }
        // only inspect the values specified in the range since a broad row or uid range will potentially go cross document
        if (scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) < 0) {
            source.next();
            continue;
        } else if (!scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) <= 0) {
            source.next();
            continue;
        } else if (scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) > 0) {
            source.next();
            continue;
        } else if (!scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) >= 0) {
            source.next();
            continue;
        }
        if (this.scanRange.isStartKeyInclusive()) {
            if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) < 0) {
                if (log.isTraceEnabled()) {
                    log.trace("not inclusive " + top + " is before " + this.scanRange.getStartKey());
                }
                source.next();
                continue;
            }
        } else {
            if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) <= 0) {
                if (log.isTraceEnabled()) {
                    log.trace("inclusive " + top + " is before " + this.scanRange.getStartKey());
                }
                source.next();
                continue;
            }
        }
        // Aggregate the document. NOTE: This will advance the source iterator
        tk = buildDocument ? aggregation.apply(source, document, attributeFactory) : aggregation.apply(source, scanRange, seekColumnFamilies, includeColumnFamilies);
        if (log.isTraceEnabled()) {
            log.trace("Doc size: " + this.document.size());
            log.trace("Returning pointer " + tk);
        }
    }
}
Also used : Text(org.apache.hadoop.io.Text) Document(datawave.query.attributes.Document) Range(org.apache.accumulo.core.data.Range) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Aggregations

DatawaveKey (datawave.query.data.parsers.DatawaveKey)15 Key (org.apache.accumulo.core.data.Key)12 Document (datawave.query.attributes.Document)5 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)4 PartialKey (org.apache.accumulo.core.data.PartialKey)4 Range (org.apache.accumulo.core.data.Range)4 Text (org.apache.hadoop.io.Text)4 Cardinality (datawave.query.attributes.Cardinality)3 Entry (java.util.Map.Entry)3 Value (org.apache.accumulo.core.data.Value)3 Attribute (datawave.query.attributes.Attribute)2 AttributeFactory (datawave.query.attributes.AttributeFactory)2 Attributes (datawave.query.attributes.Attributes)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)2 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)2 TypeMetadata (datawave.query.util.TypeMetadata)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2