use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class QueryLogicTestHarness method assertLogicResults.
// =============================================
// assert methods
/**
* Determines if the correct results were obtained for a query.
*
* @param logic
* key/value response data
* @param expected
* list of key values expected within response data
* @param checkers
* list of additional validation methods
*/
public void assertLogicResults(BaseQueryLogic<Map.Entry<Key, Value>> logic, Collection<String> expected, List<DocumentChecker> checkers) {
Set<String> actualResults = new HashSet<>();
if (log.isDebugEnabled()) {
log.debug(" ====== expected id(s) ======");
for (String e : expected) {
log.debug("id(" + e + ")");
}
}
for (Map.Entry<Key, Value> entry : logic) {
if (FinalDocumentTrackingIterator.isFinalDocumentKey(entry.getKey())) {
continue;
}
final Document document = this.deserializer.apply(entry).getValue();
// check all of the types to ensure that all are keepers as defined in the
// AttributeFactory class
int count = 0;
for (Attribute<? extends Comparable<?>> attribute : document.getAttributes()) {
if (attribute instanceof TimingMetadata) {
// ignore
} else if (attribute instanceof Attributes) {
Attributes attrs = (Attributes) attribute;
Collection<Class<?>> types = new HashSet<>();
for (Attribute<? extends Comparable<?>> attr : attrs.getAttributes()) {
count++;
if (attr instanceof TypeAttribute) {
Type<? extends Comparable<?>> type = ((TypeAttribute<?>) attr).getType();
if (Objects.nonNull(type)) {
types.add(type.getClass());
}
}
}
Assert.assertEquals(AttributeFactory.getKeepers(types), types);
} else {
count++;
}
}
// ignore empty documents (possible when only passing FinalDocument back)
if (count == 0) {
continue;
}
// parse the document
String extractedResult = this.parser.parse(entry.getKey(), document);
log.debug("result(" + extractedResult + ") key(" + entry.getKey() + ") document(" + document + ")");
// verify expected results
Assert.assertNotNull("extracted result", extractedResult);
Assert.assertFalse("duplicate result(" + extractedResult + ") key(" + entry.getKey() + ")", actualResults.contains(extractedResult));
actualResults.add(extractedResult);
// perform any custom assert checks on document
for (final DocumentChecker check : checkers) {
check.assertValid(document);
}
}
log.info("total records found(" + actualResults.size() + ") expected(" + expected.size() + ")");
// ensure that the complete expected result set exists
if (expected.size() > actualResults.size()) {
final Set<String> notFound = new HashSet<>(expected);
notFound.removeAll(actualResults);
for (final String m : notFound) {
log.error("missing result(" + m + ")");
}
} else if (expected.size() < actualResults.size()) {
final Set<String> extra = new HashSet<>(actualResults);
extra.removeAll(expected);
for (final String r : extra) {
log.error("unexpected result(" + r + ")");
}
}
Assert.assertEquals("results do not match expected", expected.size(), actualResults.size());
Assert.assertTrue("expected and actual values do not match", expected.containsAll(actualResults));
Assert.assertTrue("expected and actual values do not match", actualResults.containsAll(expected));
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class CompositeFunctionsTest method runTestQuery.
protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms, Connector connector, ShardQueryLogic logic) throws Exception {
log.debug("runTestQuery");
log.trace("Creating QueryImpl");
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(querystr);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
logic.setMaxEvaluationPipelines(1);
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
HashSet<String> expectedSet = new HashSet<>(expected);
HashSet<String> resultSet;
resultSet = new HashSet<>();
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.debug(entry.getKey() + " => " + d);
Attribute<?> attr = d.get("UUID");
if (attr == null) {
attr = d.get("UUID.0");
}
Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
TypeAttribute<?> UUIDAttr = (TypeAttribute<?>) attr;
String UUID = UUIDAttr.getType().getDelegate().toString();
Assert.assertTrue("Received unexpected UUID: " + UUID, expected.contains(UUID));
resultSet.add(UUID);
docs.add(d);
}
if (expected.size() > resultSet.size()) {
expectedSet.addAll(expected);
expectedSet.removeAll(resultSet);
for (String s : expectedSet) {
log.warn("Missing: " + s);
}
}
if (!expected.containsAll(resultSet)) {
log.error("Expected results " + expected + " differ form actual results " + resultSet);
}
Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class HitsAreAlwaysIncludedCommonalityTokenTest method runTestQuery.
protected void runTestQuery(Connector connector, String queryString, Date startDate, Date endDate, Map<String, String> extraParms, Collection<String> goodResults) throws Exception {
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(queryString);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.trace(entry.getKey() + " => " + d);
docs.add(d);
Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
if (hitAttribute instanceof Attributes) {
Attributes attributes = (Attributes) hitAttribute;
for (Attribute attr : attributes.getAttributes()) {
if (attr instanceof Content) {
Content content = (Content) attr;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
}
} else if (hitAttribute instanceof Content) {
Content content = (Content) hitAttribute;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
// remove from goodResults as we find the expected return fields
log.debug("goodResults: " + goodResults);
Map<String, Attribute<? extends Comparable<?>>> dictionary = d.getDictionary();
log.debug("dictionary:" + dictionary);
for (Entry<String, Attribute<? extends Comparable<?>>> dictionaryEntry : dictionary.entrySet()) {
Attribute<? extends Comparable<?>> attribute = dictionaryEntry.getValue();
if (attribute instanceof Attributes) {
for (Attribute attr : ((Attributes) attribute).getAttributes()) {
String toFind = dictionaryEntry.getKey() + ":" + attr;
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
} else {
String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue();
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
}
Assert.assertTrue(goodResults + " was not empty", goodResults.isEmpty());
}
Assert.assertTrue("No docs were returned!", !docs.isEmpty());
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class IvaratorInterruptTest method runTestQuery.
protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms) throws Exception {
log.debug("runTestQuery");
log.trace("Creating QueryImpl");
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(querystr);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
logic.setMaxEvaluationPipelines(1);
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
HashSet<String> expectedSet = new HashSet<>(expected);
HashSet<String> resultSet;
resultSet = new HashSet<>();
Set<Document> docs = new HashSet<>();
for (Map.Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.debug(entry.getKey() + " => " + d);
Attribute<?> attr = d.get("UUID");
if (attr == null)
attr = d.get("UUID.0");
Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
TypeAttribute<?> UUIDAttr = (TypeAttribute<?>) attr;
String UUID = UUIDAttr.getType().getDelegate().toString();
Assert.assertTrue("Received unexpected UUID: " + UUID, expected.contains(UUID));
resultSet.add(UUID);
docs.add(d);
}
if (expected.size() > resultSet.size()) {
expectedSet.addAll(expected);
expectedSet.removeAll(resultSet);
for (String s : expectedSet) {
log.warn("Missing: " + s);
}
}
if (!expected.containsAll(resultSet)) {
log.error("Expected results " + expected + " differ form actual results " + resultSet);
}
Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class QueryIterator method seek.
@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
// preserve the original range for use with the Final Document tracking iterator because it is placed after the ResultCountingIterator
// so the FinalDocumentTracking iterator needs the start key with the count already appended
originalRange = range;
getActiveQueryLog().get(getQueryId()).beginCall(this.originalRange, ActiveQuery.CallType.SEEK);
Span span = Trace.start("QueryIterator.seek");
if (!this.isIncludeGroupingContext() && (this.query.contains("grouping:") || this.query.contains("matchesInGroup") || this.query.contains("MatchesInGroup") || this.query.contains("atomValuesMatch"))) {
this.setIncludeGroupingContext(true);
this.groupingContextAddedByMe = true;
} else {
this.groupingContextAddedByMe = false;
}
try {
if (log.isDebugEnabled()) {
log.debug("Seek range: " + range + " " + query);
}
this.range = range;
// determine whether this is a teardown/rebuild range
long resultCount = 0;
if (!range.isStartKeyInclusive()) {
// see if we can fail fast. If we were rebuilt with the FinalDocument key, then we are already completely done
if (collectTimingDetails && FinalDocumentTrackingIterator.isFinalDocumentKey(range.getStartKey())) {
this.seekKeySource = new EmptyTreeIterable();
this.serializedDocuments = EmptyIterator.emptyIterator();
prepareKeyValue(span);
return;
}
// see if we have a count in the cf
Key startKey = range.getStartKey();
String[] parts = StringUtils.split(startKey.getColumnFamily().toString(), '\0');
if (parts.length == 3) {
resultCount = NumericalEncoder.decode(parts[0]).longValue();
// remove the count from the range
startKey = new Key(startKey.getRow(), new Text(parts[1] + '\0' + parts[2]), startKey.getColumnQualifier(), startKey.getColumnVisibility(), startKey.getTimestamp());
this.range = range = new Range(startKey, range.isStartKeyInclusive(), range.getEndKey(), range.isEndKeyInclusive());
}
}
// determine whether this is a document specific range
Range documentRange = isDocumentSpecificRange(range) ? range : null;
// is done
if (documentRange != null && !documentRange.isStartKeyInclusive()) {
if (log.isTraceEnabled()) {
log.trace("Received non-inclusive event specific range: " + documentRange);
}
if (gatherTimingDetails()) {
this.seekKeySource = new EvaluationTrackingNestedIterator(QuerySpan.Stage.EmptyTree, trackingSpan, new EmptyTreeIterable(), myEnvironment);
} else {
this.seekKeySource = new EmptyTreeIterable();
}
} else // if the Range is for a single document and the query doesn't reference any index-only or tokenized fields
if (documentRange != null && (!this.isContainsIndexOnlyTerms() && this.getTermFrequencyFields().isEmpty() && !super.mustUseFieldIndex)) {
if (log.isTraceEnabled()) {
log.trace("Received event specific range: " + documentRange);
}
// We can take a shortcut to the directly to the event
Map.Entry<Key, Document> documentKey = Maps.immutableEntry(super.getDocumentKey.apply(documentRange), new Document());
if (log.isTraceEnabled()) {
log.trace("Transformed document key: " + documentKey);
}
if (gatherTimingDetails()) {
this.seekKeySource = new EvaluationTrackingNestedIterator(QuerySpan.Stage.DocumentSpecificTree, trackingSpan, new DocumentSpecificNestedIterator(documentKey), myEnvironment);
} else {
this.seekKeySource = new DocumentSpecificNestedIterator(documentKey);
}
} else {
this.seekKeySource = buildDocumentIterator(documentRange, range, columnFamilies, inclusive);
}
// Create the pipeline iterator for document aggregation and
// evaluation within a thread pool
PipelineIterator pipelineIter = PipelineFactory.createIterator(this.seekKeySource, getMaxEvaluationPipelines(), getMaxPipelineCachedResults(), getSerialPipelineRequest(), querySpanCollector, trackingSpan, this, sourceForDeepCopies.deepCopy(myEnvironment), myEnvironment, yield, yieldThresholdMs, columnFamilies, inclusive);
pipelineIter.setCollectTimingDetails(collectTimingDetails);
// TODO pipelineIter.setStatsdHostAndPort(statsdHostAndPort);
pipelineIter.startPipeline();
// gather Key,Document Entries from the pipelines
Iterator<Entry<Key, Document>> pipelineDocuments = pipelineIter;
if (log.isTraceEnabled()) {
pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
log.trace("after pipeline, keyDocumentEntry:" + keyDocumentEntry);
return true;
});
}
// now apply the unique transform if requested
UniqueTransform uniquify = getUniqueTransform();
if (uniquify != null) {
pipelineDocuments = Iterators.filter(pipelineDocuments, uniquify.getUniquePredicate());
}
// apply the grouping transform if requested and if the batch size is greater than zero
// if the batch size is 0, then grouping is computed only on the web server
GroupingTransform groupify = getGroupingTransform();
if (groupify != null && this.groupFieldsBatchSize > 0) {
pipelineDocuments = groupingTransform.getGroupingIterator(pipelineDocuments, this.groupFieldsBatchSize, this.yield);
if (log.isTraceEnabled()) {
pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
log.trace("after grouping, keyDocumentEntry:" + keyDocumentEntry);
return true;
});
}
}
pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
// last chance before the documents are serialized
getActiveQueryLog().get(getQueryId()).recordStats(keyDocumentEntry.getValue(), querySpanCollector.getCombinedQuerySpan(null));
// Always return true since we just want to record data in the ActiveQueryLog
return true;
});
if (this.getReturnType() == ReturnType.kryo) {
// Serialize the Document using Kryo
this.serializedDocuments = Iterators.transform(pipelineDocuments, new KryoDocumentSerializer(isReducedResponse(), isCompressResults()));
} else if (this.getReturnType() == ReturnType.writable) {
// Use the Writable interface to serialize the Document
this.serializedDocuments = Iterators.transform(pipelineDocuments, new WritableDocumentSerializer(isReducedResponse()));
} else if (this.getReturnType() == ReturnType.tostring) {
// Just return a toString() representation of the document
this.serializedDocuments = Iterators.transform(pipelineDocuments, new ToStringDocumentSerializer(isReducedResponse()));
} else {
throw new IllegalArgumentException("Unknown return type of: " + this.getReturnType());
}
if (log.isTraceEnabled()) {
KryoDocumentDeserializer dser = new KryoDocumentDeserializer();
this.serializedDocuments = Iterators.filter(this.serializedDocuments, keyValueEntry -> {
log.trace("after serializing, keyValueEntry:" + dser.apply(keyValueEntry));
return true;
});
}
// Cannot do this on document specific ranges as the count would place the keys outside the initial range
if (!sortedUIDs && documentRange == null) {
this.serializedDocuments = new ResultCountingIterator(serializedDocuments, resultCount, yield);
} else if (this.sortedUIDs) {
// we have sorted UIDs, so we can mask out the cq
this.serializedDocuments = new KeyAdjudicator<>(serializedDocuments, yield);
}
// only add the final document tracking iterator which sends stats back to the client if collectTimingDetails is true
if (collectTimingDetails) {
// if there is no document to return, then add an empty document
// to store the timing metadata
this.serializedDocuments = new FinalDocumentTrackingIterator(querySpanCollector, trackingSpan, originalRange, this.serializedDocuments, this.getReturnType(), this.isReducedResponse(), this.isCompressResults(), this.yield);
}
if (log.isTraceEnabled()) {
KryoDocumentDeserializer dser = new KryoDocumentDeserializer();
this.serializedDocuments = Iterators.filter(this.serializedDocuments, keyValueEntry -> {
log.debug("finally, considering:" + dser.apply(keyValueEntry));
return true;
});
}
// Determine if we have items to return
prepareKeyValue(span);
} catch (Exception e) {
handleException(e);
} finally {
if (gatherTimingDetails() && trackingSpan != null && querySpanCollector != null) {
querySpanCollector.addQuerySpan(trackingSpan);
}
if (null != span) {
span.stop();
}
QueryStatsDClient client = getStatsdClient();
if (client != null) {
client.flush();
}
getActiveQueryLog().get(getQueryId()).endCall(this.originalRange, ActiveQuery.CallType.SEEK);
if (this.key == null && this.value == null) {
// no entries to return
getActiveQueryLog().remove(getQueryId(), this.originalRange);
}
}
}
Aggregations