use of datawave.query.util.Tuple3 in project datawave by NationalSecurityAgency.
the class JexlEvaluationTest method testContentPhraseFunction.
@Test
public void testContentPhraseFunction() {
String query = "FOO == 'bar' && TOKFIELD == 'big' && TOKFIELD == 'red' && TOKFIELD == 'dog' && content:phrase(termOffsetMap, 'big', 'red', 'dog')";
Map<String, TermFrequencyList> map = new HashMap<>();
map.put("big", buildTfList("TOKFIELD", 1));
map.put("red", buildTfList("TOKFIELD", 2));
map.put("dog", buildTfList("TOKFIELD", 3));
DatawaveJexlContext context = new DatawaveJexlContext();
context.set(Constants.TERM_OFFSET_MAP_JEXL_VARIABLE_NAME, map);
Key docKey = new Key("shard", "datatype\0uid");
Document d = new Document();
d.put("FOO", new Content("bar", docKey, true));
d.put("TOKFIELD", new Content("big", docKey, true));
d.put("TOKFIELD", new Content("red", docKey, true));
d.put("TOKFIELD", new Content("dog", docKey, true));
d.visit(Arrays.asList("FOO", "TOKFIELD"), context);
JexlEvaluation evaluation = new JexlEvaluation(query, new HitListArithmetic());
Tuple3<Key, Document, DatawaveJexlContext> tuple = new Tuple3<>(docKey, d, context);
boolean result = evaluation.apply(tuple);
assertTrue(result);
// assert that "big red dog" came back in the hit terms
boolean foundPhrase = false;
Attributes attrs = (Attributes) d.get("HIT_TERM");
for (Attribute<?> attr : attrs.getAttributes()) {
if (attr.getData().equals("TOKFIELD:big red dog")) {
foundPhrase = true;
}
}
assertEquals(5, attrs.size());
assertTrue(foundPhrase);
}
use of datawave.query.util.Tuple3 in project datawave by NationalSecurityAgency.
the class IndexOnlyFunctionIterator method newLazyFetchingIterator.
/**
* Create an iterator that will allow for lazy, incremental fetching of index-only values
*
* @param fieldName
* The field to fetch
* @return An iterator that will allow for lazy, incremental fetching of index-only values
*/
@SuppressWarnings("unchecked")
public <E> Iterator<E> newLazyFetchingIterator(final String fieldName) {
Collection<E> values = Collections.emptySet();
Iterator<E> iterator = values.iterator();
try {
// Create a specialized, index-only KeyToDocumentData
final IndexOnlyKeyToDocumentData keyToDocumentData = new IndexOnlyKeyToDocumentData(this.parentRange, fieldName, this.indexOnlySeeker, false);
// Initialize the fetch
final Iterator<Entry<Key, Document>> documents = this.initializeFetch(fieldName, keyToDocumentData);
// Populate the document
final Iterator<Tuple2<Key, Document>> tupleItr = Iterators.transform(documents, new EntryToTuple<>());
final EmptyTermFrequencyFunction tfFunction = new EmptyTermFrequencyFunction();
final Iterator<Tuple3<Key, Document, Map<String, Object>>> itrWithContext = TraceIterators.transform(tupleItr, tfFunction, "Term Frequency Lookup");
final Iterator<Tuple3<Key, Document, DatawaveJexlContext>> itrWithDatawaveJexlContext = Iterators.transform(itrWithContext, new SingleValueContextCreator(fieldName));
// Replace the return value with a lazy-fetching iterator
iterator = (Iterator<E>) Iterators.transform(itrWithDatawaveJexlContext, new TupleToGenericFunction(fieldName));
} catch (final Exception e) {
final String message = "Could not perform function on index-only field '" + fieldName + "\' for range " + this.parentRange;
LOG.error(message, e);
}
return iterator;
}
use of datawave.query.util.Tuple3 in project datawave by NationalSecurityAgency.
the class QueryIterator method getEvaluation.
protected Iterator<Entry<Key, Document>> getEvaluation(NestedQueryIterator<Key> documentSource, SortedKeyValueIterator<Key, Value> sourceDeepCopy, Iterator<Entry<Key, Document>> documents, CompositeMetadata compositeMetadata, TypeMetadata typeMetadataForEval, Collection<ByteSequence> columnFamilies, boolean inclusive) {
// Filter the Documents by testing them against the JEXL query
if (!this.disableEvaluation) {
JexlEvaluation jexlEvaluationFunction = getJexlEvaluation(documentSource);
Collection<String> variables = null;
if (null != documentSource && null != documentSource.getQuery()) {
variables = VariableNameVisitor.parseQuery(jexlEvaluationFunction.parse(documentSource.getQuery()));
} else {
variables = VariableNameVisitor.parseQuery(jexlEvaluationFunction.parse(query));
}
final Iterator<Tuple2<Key, Document>> tupleItr = Iterators.transform(documents, new EntryToTuple<>());
// get the function we use for the tf functionality. Note we are
// getting an additional source deep copy for this function
final Iterator<Tuple3<Key, Document, Map<String, Object>>> itrWithContext;
if (this.isTermFrequenciesRequired()) {
// The TFFunction can only prune non index-only fields
Set<String> tfIndexOnlyFields = Sets.intersection(getTermFrequencyFields(), getIndexOnlyFields());
Function<Tuple2<Key, Document>, Tuple3<Key, Document, Map<String, Object>>> tfFunction;
tfFunction = TFFactory.getFunction(getScript(documentSource), getContentExpansionFields(), getTermFrequencyFields(), this.getTypeMetadata(), super.equality, getEvaluationFilter(), sourceDeepCopy.deepCopy(myEnvironment), tfIndexOnlyFields);
itrWithContext = TraceIterators.transform(tupleItr, tfFunction, "Term Frequency Lookup");
} else {
itrWithContext = Iterators.transform(tupleItr, new EmptyContext<>());
}
try {
IteratorBuildingVisitor iteratorBuildingVisitor = createIteratorBuildingVisitor(getDocumentRange(documentSource), false, this.sortedUIDs);
Multimap<String, JexlNode> delayedNonEventFieldMap = DelayedNonEventSubTreeVisitor.getDelayedNonEventFieldMap(iteratorBuildingVisitor, script, getNonEventFields());
IndexOnlyContextCreatorBuilder contextCreatorBuilder = new IndexOnlyContextCreatorBuilder().setSource(sourceDeepCopy).setRange(getDocumentRange(documentSource)).setTypeMetadata(typeMetadataForEval).setCompositeMetadata(compositeMetadata).setOptions(this).setVariables(variables).setIteratorBuildingVisitor(iteratorBuildingVisitor).setDelayedNonEventFieldMap(delayedNonEventFieldMap).setEquality(equality).setColumnFamilies(columnFamilies).setInclusive(inclusive).setComparatorFactory(this);
final IndexOnlyContextCreator contextCreator = contextCreatorBuilder.build();
if (exceededOrEvaluationCache != null) {
contextCreator.addAdditionalEntries(exceededOrEvaluationCache);
}
final Iterator<Tuple3<Key, Document, DatawaveJexlContext>> itrWithDatawaveJexlContext = Iterators.transform(itrWithContext, contextCreator);
Iterator<Tuple3<Key, Document, DatawaveJexlContext>> matchedDocuments = statelessFilter(itrWithDatawaveJexlContext, jexlEvaluationFunction);
if (log.isTraceEnabled()) {
log.trace("arithmetic:" + arithmetic + " range:" + getDocumentRange(documentSource) + ", thread:" + Thread.currentThread());
}
return Iterators.transform(matchedDocuments, new TupleToEntry<>());
} catch (InstantiationException | MalformedURLException | IllegalAccessException | ConfigException e) {
throw new IllegalStateException("Could not perform delayed index only evaluation", e);
}
} else if (log.isTraceEnabled()) {
log.trace("Evaluation is disabled, not instantiating Jexl evaluation logic");
}
return documents;
}
use of datawave.query.util.Tuple3 in project datawave by NationalSecurityAgency.
the class IndexOnlyContextCreator method newDatawaveJexlContext.
@Override
protected DatawaveJexlContext newDatawaveJexlContext(final Tuple3<Key, Document, Map<String, Object>> from) {
final DatawaveJexlContext parentContext = super.newDatawaveJexlContext(from);
DatawaveJexlContext newContext;
if (this.createIndexOnlyJexlContext) {
final Key key = from.first();
final IndexOnlyFunctionIterator<Tuple3<Key, Document, DatawaveJexlContext>> iterator = new IndexOnlyFunctionIterator<>(this.documentSpecificSource, this, key);
newContext = new IndexOnlyJexlContext<>(parentContext, iterator);
} else {
newContext = parentContext;
}
// see if there are any delayed nodes that need to be processed
if (delayedNonEventFieldMap != null && !delayedNonEventFieldMap.isEmpty()) {
// build the current document range from the document Key to end of the document, even though for some query logics this may be too large a range,
// it will be narrowed with equality later
Key startKey = new Key(from.first().getRow(), from.first().getColumnFamily());
Key endKey = new Key(startKey.getRow().toString(), startKey.getColumnFamily() + Constants.MAX_UNICODE_STRING);
Range docRange = new Range(startKey, true, endKey, false);
newContext = new DelayedNonEventIndexContext(newContext, iteratorBuildingVisitor, delayedNonEventFieldMap, docRange, columnFamilies, inclusive, equality);
}
return newContext;
}
use of datawave.query.util.Tuple3 in project datawave by NationalSecurityAgency.
the class KeyToDocumentData method applyDescendantCounts.
private static int applyDescendantCounts(final DescendantCountFunction function, final Range range, final Key key, final List<Entry<Key, Value>> documentAttributes, final String visibility, long timestamp) {
int basicChildCount = 0;
if ((null != function) && (null != key)) {
// Count the descendants, generating keys based on query options and
// document attributes
final Tuple3<Range, Key, List<Entry<Key, Value>>> tuple = new Tuple3<>(range, key, documentAttributes);
final DescendantCount count = function.apply(tuple);
// No need to do any more work if there aren't any descendants
if ((null != count) && count.hasDescendants()) {
// Extract the basic, first-generation count
basicChildCount = count.getFirstGenerationCount();
// Get any generated keys, apply any specified visibility, and
// add to the document attributes
final List<Key> keys = count.getKeys();
if ((null != documentAttributes) && !documentAttributes.isEmpty() && !keys.isEmpty()) {
// Create a Text for the Keys' visibility
Text appliedVis;
if ((null != visibility) && !visibility.isEmpty()) {
appliedVis = new Text(visibility);
} else {
appliedVis = new Text();
}
// Conditionally adjust visibility and timestamp
for (final Key childCountKey : keys) {
final Text appliedRow = childCountKey.getRow();
final Text appliedCf = childCountKey.getColumnFamily();
final Text appliedCq = childCountKey.getColumnQualifier();
if ((null == visibility) || visibility.isEmpty()) {
childCountKey.getColumnVisibility(appliedVis);
}
if (!(timestamp > 0)) {
timestamp = childCountKey.getTimestamp();
}
final Key appliedKey = new Key(appliedRow, appliedCf, appliedCq, appliedVis, timestamp);
documentAttributes.add(Maps.immutableEntry(appliedKey, new Value()));
}
}
}
}
return basicChildCount;
}
Aggregations