Search in sources :

Example 1 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class AncestorChildExpansionIterator method nextChild.

/**
 * Update topKey and topValue to point to the next valid fi key either directly from the underlying iterator, or if a child exists that was not on the
 * iterator, replicate the fi for the child until no more children match, then move on to the next item on the iterator that is after the current key,
 * advancing both iterators until they have been exhausted. There should only ever be one pass made through either iterator.
 */
private void nextChild() {
    // an attempt has been made to reach the topKey
    initialized = true;
    // decide if starting from topKey or the first item in the iterator if topKey has never been assigned
    if (topKey == null && iterator.hasTop()) {
        final Key iteratorTop = iterator.getTopKey();
        final DatawaveKey datawaveKey = new DatawaveKey(iteratorTop);
        iteratorKeyInfo = new BaseIteratorInfo(iteratorTop, datawaveKey, getPartOfKey(iteratorTop, datawaveKey.getDataType(), datawaveKey.getUid(), datawaveKey.getFieldName(), datawaveKey.getFieldValue()));
        topKey = iteratorKeyInfo.getKey();
        topValue = iterator.getTopValue();
        return;
    }
    // if there is nothing in the iterator, bail out
    if (topKey == null) {
        return;
    }
    // parse out components to generate new keys
    final DatawaveKey datawaveKey = iteratorKeyInfo.getDatawaveKey();
    final String uidHit = datawaveKey.getUid();
    final String field = datawaveKey.getFieldName();
    final String value = datawaveKey.getFieldValue();
    final String dataType = datawaveKey.getDataType();
    // find the next suitable top key, either from the children or the underlying iterator. The goal is to make only
    // a single pass through the children and the iterator unless there is a seek. This is possible because the children
    // are assumed to be lexicographically sorted
    Key next = null;
    for (int i = lastChildIndex + 1; next == null && i < children.size(); i++) {
        final String child = children.get(i);
        // Only evaluate children that come after the uid of the iterator.
        if (uidHit.compareTo(child) < 0) {
            // test for equality generate keys that match the shard event keys for testing
            final Key iteratorTestKey = iteratorKeyInfo.getPartOfKey();
            final Key childTestKey = getPartOfKey(topKey, dataType, child, field, value);
            if (equality.partOf(childTestKey, iteratorTestKey)) {
                // generate an fi matching the previous key but with the new uid
                next = new Key(topKey.getRow().toString(), topKey.getColumnFamily().toString(), value + Constants.NULL_BYTE_STRING + dataType + Constants.NULL_BYTE_STRING + child, topKey.getColumnVisibilityParsed(), topKey.getTimestamp());
            } else {
                // since the child came after the current topKey but was not a child, the next top will have to come off
                // of the iterator
                next = advanceBaseIterator();
            }
        }
        // no need to evaluate this child again since everything is sorted
        lastChildIndex++;
    }
    // there should be a direct relationship between the children and parent, but do this as a safety check
    if (next == null && iterator.hasTop()) {
        next = advanceBaseIterator();
    }
    // assign the next Key to the topKey
    topKey = next;
}
Also used : DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Example 2 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class AncestorChildExpansionIterator method advanceBaseIterator.

/**
 * Attempt to advance the base iterator beyond the current topKey
 *
 * @return the first item from the iterator that is beyond the current topKey, or null
 * @throws RuntimeException
 *             if an iterator fails to advance
 */
private Key advanceBaseIterator() {
    Key next = null;
    // set the next returned key to the value from the iterator as long as there are more and its less than the current key
    try {
        while (iterator.hasTop() && iterator.getTopKey().compareTo(topKey) <= 0) {
            iterator.next();
        }
    } catch (IOException e) {
        throw new RuntimeException("Failed to advance base iterator");
    }
    // as long as we didn't exhaust the iterator looking for a key following the topKey grab it and set it to the top
    if (iterator.hasTop()) {
        // there was another value on the iterator, keep going
        final Key iteratorTop = iterator.getTopKey();
        final DatawaveKey datawaveKey = new DatawaveKey(iteratorTop);
        iteratorKeyInfo = new BaseIteratorInfo(iteratorTop, datawaveKey, getPartOfKey(iteratorTop, datawaveKey.getDataType(), datawaveKey.getUid(), datawaveKey.getFieldName(), datawaveKey.getFieldValue()));
        next = iteratorKeyInfo.getKey();
        topValue = iterator.getTopValue();
    } else {
        // if there is not a current top, there is no other top, so we have reached the end. Set the lastChildIndex
        // so that subsequent calls won't have to determine this a second time and bail out
        lastChildIndex = children.size();
    }
    return next;
}
Also used : IOException(java.io.IOException) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key)

Example 3 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class StatsHyperLogMapper method map.

@Override
protected void map(Key key, Value value, Context context) throws IOException, InterruptedException {
    if (log.isTraceEnabled()) {
        log.trace("map(" + key + ", " + value + ")");
    }
    // range should find all field index rows
    String[] colf = StringUtils.split(key.getColumnFamily().toString(), NULL_CHAR);
    if ("fi".equals(colf[0])) {
        this.total++;
        if (0 == this.total % this.logInputInterval) {
            log.info("input row count(" + this.total + ")");
        }
        DatawaveKey dwKey = new DatawaveKey(key);
        if (log.isTraceEnabled()) {
            log.trace("mapper input(" + dwKey.toString() + ")");
        }
        String fieldName = dwKey.getFieldName();
        if (null == this.currentFieldName) {
            this.currentFieldName = fieldName;
        } else {
            if (!this.currentFieldName.equals(fieldName)) {
                flushFieldValues(context);
                this.currentFieldName = fieldName;
            }
        }
        // add value to proper data type
        String dataType = dwKey.getDataType();
        DataTypeInfo typeInfo = this.dataTypeMapping.computeIfAbsent(dataType, k -> new DataTypeInfo(dwKey, this.sumUniqueCounts, this.normalPrecision, this.sparsePrecision));
        typeInfo.add(dwKey.getFieldValue());
    }
    context.progress();
    if (log.isTraceEnabled()) {
        log.trace("Completed map(" + key + ", " + value + ")");
    }
}
Also used : DatawaveKey(datawave.query.data.parsers.DatawaveKey)

Example 4 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class TLDQueryIterator method getFIEvaluationFilter.

/**
 * Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
 *
 * @return
 */
protected EventDataQueryFilter getFIEvaluationFilter() {
    ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
    // primary filter on the current filter
    chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
    // prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
    EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        @Override
        public boolean peek(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        /**
         * Keep any FI that is index only and part of the TLD or is not part of the TLD
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            boolean root = TLDEventDataFilter.isRootPointer(k);
            DatawaveKey datawaveKey = new DatawaveKey(k);
            return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Map.Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            return null;
        }
    };
    chainableEventDataQueryFilter.addFilter(tldFiFilter);
    return chainableEventDataQueryFilter;
}
Also used : ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) Nullable(javax.annotation.Nullable)

Example 5 with DatawaveKey

use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.

the class KeyProjection method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Predicate#apply(java.lang.Object)
     */
@Override
public boolean apply(Entry<Key, String> input) {
    final DatawaveKey parser = new DatawaveKey(input.getKey());
    final String fieldName = JexlASTHelper.removeGroupingContext(parser.getFieldName());
    return projection.apply(fieldName);
}
Also used : DatawaveKey(datawave.query.data.parsers.DatawaveKey)

Aggregations

DatawaveKey (datawave.query.data.parsers.DatawaveKey)15 Key (org.apache.accumulo.core.data.Key)12 Document (datawave.query.attributes.Document)5 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)4 PartialKey (org.apache.accumulo.core.data.PartialKey)4 Range (org.apache.accumulo.core.data.Range)4 Text (org.apache.hadoop.io.Text)4 Cardinality (datawave.query.attributes.Cardinality)3 Entry (java.util.Map.Entry)3 Value (org.apache.accumulo.core.data.Value)3 Attribute (datawave.query.attributes.Attribute)2 AttributeFactory (datawave.query.attributes.AttributeFactory)2 Attributes (datawave.query.attributes.Attributes)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)2 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)2 TypeMetadata (datawave.query.util.TypeMetadata)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2