use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class AncestorChildExpansionIterator method nextChild.
/**
* Update topKey and topValue to point to the next valid fi key either directly from the underlying iterator, or if a child exists that was not on the
* iterator, replicate the fi for the child until no more children match, then move on to the next item on the iterator that is after the current key,
* advancing both iterators until they have been exhausted. There should only ever be one pass made through either iterator.
*/
private void nextChild() {
// an attempt has been made to reach the topKey
initialized = true;
// decide if starting from topKey or the first item in the iterator if topKey has never been assigned
if (topKey == null && iterator.hasTop()) {
final Key iteratorTop = iterator.getTopKey();
final DatawaveKey datawaveKey = new DatawaveKey(iteratorTop);
iteratorKeyInfo = new BaseIteratorInfo(iteratorTop, datawaveKey, getPartOfKey(iteratorTop, datawaveKey.getDataType(), datawaveKey.getUid(), datawaveKey.getFieldName(), datawaveKey.getFieldValue()));
topKey = iteratorKeyInfo.getKey();
topValue = iterator.getTopValue();
return;
}
// if there is nothing in the iterator, bail out
if (topKey == null) {
return;
}
// parse out components to generate new keys
final DatawaveKey datawaveKey = iteratorKeyInfo.getDatawaveKey();
final String uidHit = datawaveKey.getUid();
final String field = datawaveKey.getFieldName();
final String value = datawaveKey.getFieldValue();
final String dataType = datawaveKey.getDataType();
// find the next suitable top key, either from the children or the underlying iterator. The goal is to make only
// a single pass through the children and the iterator unless there is a seek. This is possible because the children
// are assumed to be lexicographically sorted
Key next = null;
for (int i = lastChildIndex + 1; next == null && i < children.size(); i++) {
final String child = children.get(i);
// Only evaluate children that come after the uid of the iterator.
if (uidHit.compareTo(child) < 0) {
// test for equality generate keys that match the shard event keys for testing
final Key iteratorTestKey = iteratorKeyInfo.getPartOfKey();
final Key childTestKey = getPartOfKey(topKey, dataType, child, field, value);
if (equality.partOf(childTestKey, iteratorTestKey)) {
// generate an fi matching the previous key but with the new uid
next = new Key(topKey.getRow().toString(), topKey.getColumnFamily().toString(), value + Constants.NULL_BYTE_STRING + dataType + Constants.NULL_BYTE_STRING + child, topKey.getColumnVisibilityParsed(), topKey.getTimestamp());
} else {
// since the child came after the current topKey but was not a child, the next top will have to come off
// of the iterator
next = advanceBaseIterator();
}
}
// no need to evaluate this child again since everything is sorted
lastChildIndex++;
}
// there should be a direct relationship between the children and parent, but do this as a safety check
if (next == null && iterator.hasTop()) {
next = advanceBaseIterator();
}
// assign the next Key to the topKey
topKey = next;
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class AncestorChildExpansionIterator method advanceBaseIterator.
/**
* Attempt to advance the base iterator beyond the current topKey
*
* @return the first item from the iterator that is beyond the current topKey, or null
* @throws RuntimeException
* if an iterator fails to advance
*/
private Key advanceBaseIterator() {
Key next = null;
// set the next returned key to the value from the iterator as long as there are more and its less than the current key
try {
while (iterator.hasTop() && iterator.getTopKey().compareTo(topKey) <= 0) {
iterator.next();
}
} catch (IOException e) {
throw new RuntimeException("Failed to advance base iterator");
}
// as long as we didn't exhaust the iterator looking for a key following the topKey grab it and set it to the top
if (iterator.hasTop()) {
// there was another value on the iterator, keep going
final Key iteratorTop = iterator.getTopKey();
final DatawaveKey datawaveKey = new DatawaveKey(iteratorTop);
iteratorKeyInfo = new BaseIteratorInfo(iteratorTop, datawaveKey, getPartOfKey(iteratorTop, datawaveKey.getDataType(), datawaveKey.getUid(), datawaveKey.getFieldName(), datawaveKey.getFieldValue()));
next = iteratorKeyInfo.getKey();
topValue = iterator.getTopValue();
} else {
// if there is not a current top, there is no other top, so we have reached the end. Set the lastChildIndex
// so that subsequent calls won't have to determine this a second time and bail out
lastChildIndex = children.size();
}
return next;
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class StatsHyperLogMapper method map.
@Override
protected void map(Key key, Value value, Context context) throws IOException, InterruptedException {
if (log.isTraceEnabled()) {
log.trace("map(" + key + ", " + value + ")");
}
// range should find all field index rows
String[] colf = StringUtils.split(key.getColumnFamily().toString(), NULL_CHAR);
if ("fi".equals(colf[0])) {
this.total++;
if (0 == this.total % this.logInputInterval) {
log.info("input row count(" + this.total + ")");
}
DatawaveKey dwKey = new DatawaveKey(key);
if (log.isTraceEnabled()) {
log.trace("mapper input(" + dwKey.toString() + ")");
}
String fieldName = dwKey.getFieldName();
if (null == this.currentFieldName) {
this.currentFieldName = fieldName;
} else {
if (!this.currentFieldName.equals(fieldName)) {
flushFieldValues(context);
this.currentFieldName = fieldName;
}
}
// add value to proper data type
String dataType = dwKey.getDataType();
DataTypeInfo typeInfo = this.dataTypeMapping.computeIfAbsent(dataType, k -> new DataTypeInfo(dwKey, this.sumUniqueCounts, this.normalPrecision, this.sparsePrecision));
typeInfo.add(dwKey.getFieldValue());
}
context.progress();
if (log.isTraceEnabled()) {
log.trace("Completed map(" + key + ", " + value + ")");
}
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class TLDQueryIterator method getFIEvaluationFilter.
/**
* Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
*
* @return
*/
protected EventDataQueryFilter getFIEvaluationFilter() {
ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
// primary filter on the current filter
chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
// prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Map.Entry<Key, String> var1) {
return true;
}
@Override
public boolean peek(@Nullable Map.Entry<Key, String> var1) {
return true;
}
/**
* Keep any FI that is index only and part of the TLD or is not part of the TLD
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
boolean root = TLDEventDataFilter.isRootPointer(k);
DatawaveKey datawaveKey = new DatawaveKey(k);
return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Map.Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
return null;
}
};
chainableEventDataQueryFilter.addFilter(tldFiFilter);
return chainableEventDataQueryFilter;
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class KeyProjection method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Predicate#apply(java.lang.Object)
*/
@Override
public boolean apply(Entry<Key, String> input) {
final DatawaveKey parser = new DatawaveKey(input.getKey());
final String fieldName = JexlASTHelper.removeGroupingContext(parser.getFieldName());
return projection.apply(fieldName);
}
Aggregations