use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class AttributeToCardinality method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document prevDoc = input.getValue();
Key key = input.getKey();
// for cardinalities, only use the visibility metadata
Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, prevDoc.getColumnVisibility(), -1);
Document newDoc = new Document();
Map<?, ?> dictionary = (Map<?, ?>) prevDoc.getData();
TreeMap<String, Attribute<? extends Comparable<?>>> newDictionary = Maps.newTreeMap();
DatawaveKey parser = new DatawaveKey(input.getKey());
for (Entry<?, ?> attrE : dictionary.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
Attribute<?> attribute = attr.getValue();
if (attribute instanceof Attributes) {
Attributes attrs = (Attributes) attribute;
Attributes newAttrs = new Attributes(attrs.isToKeep());
for (Attribute<?> attributeItem : attrs.getAttributes()) {
Cardinality card = null;
if (attributeItem instanceof Cardinality) {
card = (Cardinality) attributeItem;
} else {
FieldValueCardinality fvC = new FieldValueCardinality();
fvC.setContent(attributeItem.getData().toString());
fvC.setDoc(prevDoc);
card = new Cardinality(fvC, metadata, attrs.isToKeep());
if (log.isTraceEnabled())
log.trace("Adding from attributes " + attr.getKey() + " " + attributeItem.getData());
}
newAttrs.add(card);
}
newDictionary.put(attr.getKey(), newAttrs);
} else {
Cardinality card = null;
if (attribute instanceof Cardinality) {
card = (Cardinality) attribute;
} else {
FieldValueCardinality fvC = new FieldValueCardinality();
fvC.setContent(attribute.getData().toString());
fvC.setDoc(prevDoc);
card = new Cardinality(fvC, metadata, attribute.isToKeep());
if (log.isTraceEnabled())
log.trace("Adding " + parser.getUid() + " " + attr.getKey() + " " + attribute.getData() + " " + fvC.getEstimate().cardinality());
}
newDictionary.put(attr.getKey(), card);
}
}
}
newDoc.putAll(newDictionary.entrySet().iterator(), false);
return Maps.immutableEntry(key, newDoc);
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class CardinalityAggregator method apply.
@Override
public Key apply(SortedKeyValueIterator<Key, Value> itr, Document doc, AttributeFactory attrs) throws IOException {
Key key = itr.getTopKey();
Text row = key.getRow();
ByteSequence pointer = parsePointer(key.getColumnQualifierData());
Key nextKey = key;
while (nextKey != null && samePointer(row, pointer, nextKey)) {
DatawaveKey topKey = new DatawaveKey(nextKey);
String field = topKey.getFieldName();
String value = topKey.getFieldValue();
FieldValueCardinality fvC = null;
byte[] currentValue = itr.getTopValue().get();
try {
if (currentValue.length > 0) {
fvC = new FieldValueCardinality(HyperLogLogPlus.Builder.build(currentValue));
if (log.isTraceEnabled()) {
log.trace("Set cardinality from FI value");
}
}
} catch (Exception e) {
if (log.isTraceEnabled()) {
log.trace("Exception encountered " + e);
}
}
if (null == fvC) {
if (log.isTraceEnabled())
log.trace("Building cardinality for " + topKey.getUid());
fvC = new FieldValueCardinality();
if (setDocIds)
fvC.setDocId(topKey.getUid());
}
fvC.setContent(value);
// for cardinalities, only use the visibility metadata
Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, itr.getTopKey().getColumnVisibility(), -1);
Cardinality card = new Cardinality(fvC, metadata, doc.isToKeep());
// only keep fields that are index only
card.setToKeep(fieldsToKeep == null || fieldsToKeep.contains(JexlASTHelper.removeGroupingContext(field)));
doc.put(field, card);
key = nextKey;
itr.next();
nextKey = (itr.hasTop() ? itr.getTopKey() : null);
}
return TLD.buildParentKey(row, pointer, TLD.parseFieldAndValueFromFI(key.getColumnFamilyData(), key.getColumnQualifierData()), key.getColumnVisibility(), key.getTimestamp());
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.
/**
* Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
*
* @param filter
* @param maxNextCount
* @return
*/
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
EventDataQueryFilter rootFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
@Override
public boolean peek(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
/**
* Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
DatawaveKey key = new DatawaveKey(k);
return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
throw new UnsupportedOperationException();
}
};
filter.addFilter(rootFilter);
Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class FieldIndexKeyDataTypeFilter method getSeekRange.
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
// early return if possible
if (maxNextBeforeSeek == -1 || nextCount < maxNextBeforeSeek) {
return null;
}
// parse the key to get the value and dataType
DatawaveKey datawaveKey = new DatawaveKey(current);
// test if this key should have been accepted
if (sortedDataTypes.contains(datawaveKey.getDataType())) {
return null;
}
// still here, find the next valid sorted data type and apply it for a new range
String nextDataType = null;
for (String dataType : sortedDataTypes) {
if (dataType.compareTo(datawaveKey.getDataType()) > 0) {
nextDataType = dataType;
break;
}
}
// ensure a dataType was selected
Key startKey;
boolean inclusiveStart;
if (nextDataType == null) {
// roll over the key
// this will be somewhat blind since the next value is not known
startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + Constants.MAX_UNICODE_STRING));
inclusiveStart = false;
} else {
// generate a new range with the current value and new dataType
startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(datawaveKey.getFieldValue() + Constants.NULL_BYTE_STRING + nextDataType));
inclusiveStart = true;
}
if (startKey.compareTo(endKey) > 0) {
// generate an empty range
return new Range(startKey, false, startKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), false);
}
return new Range(startKey, inclusiveStart, endKey, endKeyInclusive);
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIterator method next.
@Override
public void next() throws IOException {
// We need to null this every time even though our fieldname and fieldvalue won't
// change, we have the potential for the column visibility to change
document = new Document();
tk = null;
// reusable buffers
Text row = new Text(), cf = new Text(), cq = new Text();
if (scanRange == null) {
buildScanRangeLazily();
}
if (log.isTraceEnabled()) {
log.trace(source.hasTop() + " nexting on " + scanRange);
}
while (source.hasTop() && tk == null) {
Key top = source.getTopKey();
row = top.getRow(row);
top.getColumnFamily(cf);
top.getColumnQualifier(cq);
if (!cq.toString().endsWith(field)) {
if (log.isTraceEnabled()) {
log.trace(cq + " does not end with " + field);
}
source.next();
continue;
}
DatawaveKey key = new DatawaveKey(top);
Key nextTop = top;
for (int i = 0; i < 256 && source.hasTop() && key.getFieldName().compareTo(field) < 0; ++i) {
source.next();
nextTop = source.getTopKey();
if (nextTop == null)
break;
key = new DatawaveKey(nextTop);
if (log.isTraceEnabled()) {
log.trace("Have key " + key + " < " + field);
}
}
if (nextTop == null)
continue;
if (key.getFieldName().compareTo(field) < 0) {
if (log.isTraceEnabled()) {
log.trace("Have key " + key + " is less than " + field);
}
StringBuilder builder = new StringBuilder(key.getDataType()).append(Constants.NULL).append(key.getUid()).append(Constants.NULL).append(key.getFieldValue()).append(Constants.NULL).append(field);
Key nextKey = new Key(row, cf, new Text(builder.toString()));
Range newRange = new Range(nextKey, true, scanRange.getEndKey(), scanRange.isEndKeyInclusive());
source.seek(newRange, seekColumnFamilies, true);
continue;
}
// only inspect the values specified in the range since a broad row or uid range will potentially go cross document
if (scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) < 0) {
source.next();
continue;
} else if (!scanRange.isStartKeyInclusive() && key.getFieldValue().compareTo(startKeyParser.getFieldValue()) <= 0) {
source.next();
continue;
} else if (scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) > 0) {
source.next();
continue;
} else if (!scanRange.isEndKeyInclusive() && key.getFieldValue().compareTo(stopKeyParser.getFieldValue()) >= 0) {
source.next();
continue;
}
if (this.scanRange.isStartKeyInclusive()) {
if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) < 0) {
if (log.isTraceEnabled()) {
log.trace("not inclusive " + top + " is before " + this.scanRange.getStartKey());
}
source.next();
continue;
}
} else {
if (!this.scanRange.isInfiniteStartKey() && top.compareTo(this.scanRange.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL) <= 0) {
if (log.isTraceEnabled()) {
log.trace("inclusive " + top + " is before " + this.scanRange.getStartKey());
}
source.next();
continue;
}
}
// Aggregate the document. NOTE: This will advance the source iterator
tk = buildDocument ? aggregation.apply(source, document, attributeFactory) : aggregation.apply(source, scanRange, seekColumnFamilies, includeColumnFamilies);
if (log.isTraceEnabled()) {
log.trace("Doc size: " + this.document.size());
log.trace("Returning pointer " + tk);
}
}
}
Aggregations