Search in sources :

Example 6 with DatawaveKey

use of in project datawave by NationalSecurityAgency.

the class AttributeToCardinality method apply.

     * (non-Javadoc)
     * @see
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document prevDoc = input.getValue();
    Key key = input.getKey();
    // for cardinalities, only use the visibility metadata
    Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, prevDoc.getColumnVisibility(), -1);
    Document newDoc = new Document();
    Map<?, ?> dictionary = (Map<?, ?>) prevDoc.getData();
    TreeMap<String, Attribute<? extends Comparable<?>>> newDictionary = Maps.newTreeMap();
    DatawaveKey parser = new DatawaveKey(input.getKey());
    for (Entry<?, ?> attrE : dictionary.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            Attribute<?> attribute = attr.getValue();
            if (attribute instanceof Attributes) {
                Attributes attrs = (Attributes) attribute;
                Attributes newAttrs = new Attributes(attrs.isToKeep());
                for (Attribute<?> attributeItem : attrs.getAttributes()) {
                    Cardinality card = null;
                    if (attributeItem instanceof Cardinality) {
                        card = (Cardinality) attributeItem;
                    } else {
                        FieldValueCardinality fvC = new FieldValueCardinality();
                        card = new Cardinality(fvC, metadata, attrs.isToKeep());
                        if (log.isTraceEnabled())
                            log.trace("Adding from attributes " + attr.getKey() + " " + attributeItem.getData());
                newDictionary.put(attr.getKey(), newAttrs);
            } else {
                Cardinality card = null;
                if (attribute instanceof Cardinality) {
                    card = (Cardinality) attribute;
                } else {
                    FieldValueCardinality fvC = new FieldValueCardinality();
                    card = new Cardinality(fvC, metadata, attribute.isToKeep());
                    if (log.isTraceEnabled())
                        log.trace("Adding " + parser.getUid() + " " + attr.getKey() + " " + attribute.getData() + " " + fvC.getEstimate().cardinality());
                newDictionary.put(attr.getKey(), card);
    newDoc.putAll(newDictionary.entrySet().iterator(), false);
    return Maps.immutableEntry(key, newDoc);
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Entry(java.util.Map.Entry) TreeMap(java.util.TreeMap) Map(java.util.Map) DatawaveKey( FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) DatawaveKey( Key(

Example 7 with DatawaveKey

use of in project datawave by NationalSecurityAgency.

the class CardinalityAggregator method apply.

public Key apply(SortedKeyValueIterator<Key, Value> itr, Document doc, AttributeFactory attrs) throws IOException {
    Key key = itr.getTopKey();
    Text row = key.getRow();
    ByteSequence pointer = parsePointer(key.getColumnQualifierData());
    Key nextKey = key;
    while (nextKey != null && samePointer(row, pointer, nextKey)) {
        DatawaveKey topKey = new DatawaveKey(nextKey);
        String field = topKey.getFieldName();
        String value = topKey.getFieldValue();
        FieldValueCardinality fvC = null;
        byte[] currentValue = itr.getTopValue().get();
        try {
            if (currentValue.length > 0) {
                fvC = new FieldValueCardinality(;
                if (log.isTraceEnabled()) {
                    log.trace("Set cardinality from FI value");
        } catch (Exception e) {
            if (log.isTraceEnabled()) {
                log.trace("Exception encountered " + e);
        if (null == fvC) {
            if (log.isTraceEnabled())
                log.trace("Building cardinality for " + topKey.getUid());
            fvC = new FieldValueCardinality();
            if (setDocIds)
        // for cardinalities, only use the visibility metadata
        Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, itr.getTopKey().getColumnVisibility(), -1);
        Cardinality card = new Cardinality(fvC, metadata, doc.isToKeep());
        // only keep fields that are index only
        card.setToKeep(fieldsToKeep == null || fieldsToKeep.contains(JexlASTHelper.removeGroupingContext(field)));
        doc.put(field, card);
        key = nextKey;;
        nextKey = (itr.hasTop() ? itr.getTopKey() : null);
    return TLD.buildParentKey(row, pointer, TLD.parseFieldAndValueFromFI(key.getColumnFamilyData(), key.getColumnQualifierData()), key.getColumnVisibility(), key.getTimestamp());
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Text( DatawaveKey( FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) DatawaveKey( Key( ByteSequence( IOException(

Example 8 with DatawaveKey

use of in project datawave by NationalSecurityAgency.

the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.

 * Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
 * @param filter
 * @param maxNextCount
 * @return
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
    EventDataQueryFilter rootFilter = new EventDataQueryFilter() {

        public void startNewDocument(Key documentKey) {
        // no-op

        public boolean apply(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;

        public boolean peek(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;

         * Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
         * @param k
         * @return
        public boolean keep(Key k) {
            DatawaveKey key = new DatawaveKey(k);
            return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));

        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();

        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();

        public Range getKeyRange(Entry<Key, Document> from) {
            throw new UnsupportedOperationException();

        public EventDataQueryFilter clone() {
            return this;

        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();

        public int getMaxNextCount() {
            return -1;

        public Key transform(Key toTransform) {
            throw new UnsupportedOperationException();
    Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
    return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Entry(java.util.Map.Entry) DatawaveKey( DatawaveKey( Key( PartialKey( Nullable(javax.annotation.Nullable)

Example 9 with DatawaveKey

use of in project datawave by NationalSecurityAgency.

the class FieldIndexKeyDataTypeFilter method getSeekRange.

public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
    // early return if possible
    if (maxNextBeforeSeek == -1 || nextCount < maxNextBeforeSeek) {
        return null;
    // parse the key to get the value and dataType
    DatawaveKey datawaveKey = new DatawaveKey(current);
    // test if this key should have been accepted
    if (sortedDataTypes.contains(datawaveKey.getDataType())) {
        return null;
    // still here, find the next valid sorted data type and apply it for a new range
    String nextDataType = null;
    for (String dataType : sortedDataTypes) {
        if (dataType.compareTo(datawaveKey.getDataType()) > 0) {
            nextDataType = dataType;
    // ensure a dataType was selected
    Key startKey;
    boolean inclusiveStart;
    if (nextDataType == null) {
        // roll over the key
        // this will be somewhat blind since the next value is not known
        startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + Constants.MAX_UNICODE_STRING));
        inclusiveStart = false;
    } else {
        // generate a new range with the current value and new dataType
        startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + nextDataType));
        inclusiveStart = true;
    if (startKey.compareTo(endKey) > 0) {
        // generate an empty range
        return new Range(startKey, false, startKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), false);
    return new Range(startKey, inclusiveStart, endKey, endKeyInclusive);
Also used : Text( Range( DatawaveKey( DatawaveKey( Key( PartialKey(

Example 10 with DatawaveKey

use of in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIterator method next.

public void next() throws IOException {
    // We need to null this every time even though our fieldname and fieldvalue won't
    // change, we have the potential for the column visibility to change
    document = new Document();
    tk = null;
    // reusable buffers
    Text row = new Text(), cf = new Text(), cq = new Text();
    if (scanRange == null) {
    if (log.isTraceEnabled()) {
        log.trace(source.hasTop() + " nexting on " + scanRange);
    while (source.hasTop() && tk == null) {
        Key top = source.getTopKey();
        row = top.getRow(row);
        if (!cq.toString().endsWith(field)) {
            if (log.isTraceEnabled()) {
                log.trace(cq + " does not end with " + field);
        DatawaveKey key = new DatawaveKey(top);
        Key nextTop = top;
        for (int i = 0; i < 256 && source.hasTop() && key.getFieldName().compareTo(field) < 0; ++i) {
            nextTop = source.getTopKey();
            if (nextTop == null)
            key = new DatawaveKey(nextTop);
            if (log.isTraceEnabled()) {
                log.trace("Have key " + key + " < " + field);
        if (nextTop == null)
        if (key.getFieldName().compareTo(field) < 0) {
            if (log.isTraceEnabled()) {
                log.trace("Have key " + key + " is less than " + field);
            StringBuilder builder = new StringBuilder(key.getDataType()).append(Constants.NULL).append(key.getUid()).append(Constants.NULL).append(key.getFieldValue()).append(Constants.NULL).append(field);
            Key nextKey = new Key(row, cf, new Text(builder.toString()));
            Range newRange = new Range(nextKey, true, scanRange.getEndKey(), scanRange.isEndKeyInclusive());
  , seekColumnFamilies, true);
        // only inspect the values specified in the range since a broad row or uid range will potentially go cross document
        if (scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) < 0) {
        } else if (!scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) <= 0) {
        } else if (scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) > 0) {
        } else if (!scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) >= 0) {
        if (this.scanRange.isStartKeyInclusive()) {
            if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) < 0) {
                if (log.isTraceEnabled()) {
                    log.trace("not inclusive " + top + " is before " + this.scanRange.getStartKey());
        } else {
            if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) <= 0) {
                if (log.isTraceEnabled()) {
                    log.trace("inclusive " + top + " is before " + this.scanRange.getStartKey());
        // Aggregate the document. NOTE: This will advance the source iterator
        tk = buildDocument ? aggregation.apply(source, document, attributeFactory) : aggregation.apply(source, scanRange, seekColumnFamilies, includeColumnFamilies);
        if (log.isTraceEnabled()) {
            log.trace("Doc size: " + this.document.size());
            log.trace("Returning pointer " + tk);
Also used : Text( Document(datawave.query.attributes.Document) Range( DatawaveKey( DatawaveKey( Key( PartialKey(


DatawaveKey ( Key ( Document (datawave.query.attributes.Document)5 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)4 PartialKey ( Range ( Text ( Cardinality (datawave.query.attributes.Cardinality)3 Entry (java.util.Map.Entry)3 Value ( Attribute (datawave.query.attributes.Attribute)2 AttributeFactory (datawave.query.attributes.AttributeFactory)2 Attributes (datawave.query.attributes.Attributes)2 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)2 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)2 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)2 TypeMetadata (datawave.query.util.TypeMetadata)2 IOException ( ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2