use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.
the class DynamicFacetIterator method getDocumentIterator.
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public Iterator<Entry<Key, Document>> getDocumentIterator(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException, ConfigException, InstantiationException, IllegalAccessException {
// Otherwise, we have to use the field index
// Seek() the boolean logic stuff
createAndSeekIndexIterator(range, columnFamilies, inclusive);
Function<Entry<Key, Document>, Entry<DocumentData, Document>> keyToDoc = null;
// TODO consider using the new EventDataQueryExpressionFilter
EventDataQueryFieldFilter projection = null;
Iterator<Entry<Key, Document>> documents = null;
if (!configuration.getFacetedFields().isEmpty()) {
projection = new EventDataQueryFieldFilter();
projection.initializeWhitelist(configuration.getFacetedFields());
}
if (!configuration.hasFieldLimits() || projection != null) {
keyToDoc = new KeyToDocumentData(source.deepCopy(myEnvironment), super.equality, projection, this.includeHierarchyFields, this.includeHierarchyFields);
}
AccumuloTreeIterable<Key, DocumentData> doc = null;
if (null != keyToDoc) {
doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, keyToDoc);
} else {
if (log.isTraceEnabled()) {
log.trace("Skipping document lookup, because we don't need it");
}
doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {
@Override
@Nullable
public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
Set<Key> docKeys = Sets.newHashSet();
List<Entry<Key, Value>> attrs = Lists.newArrayList();
return Maps.immutableEntry(new DocumentData(input.getKey(), docKeys, attrs, true), input.getValue());
}
});
}
doc.seek(range, columnFamilies, inclusive);
TypeMetadata typeMetadata = this.getTypeMetadata();
documents = Iterators.transform(doc.iterator(), new Aggregation(this.getTimeFilter(), typeMetadata, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, false, null));
switch(configuration.getType()) {
case SHARD_COUNT:
case DAY_COUNT:
SortedKeyValueIterator<Key, Value> sourceDeepCopy = source.deepCopy(myEnvironment);
documents = getEvaluation(sourceDeepCopy, documents, compositeMetadata, typeMetadata, columnFamilies, inclusive);
// Take the document Keys and transform it into Entry<Key,Document>, removing Attributes for this Document
// which do not fall within the expected time range
documents = Iterators.transform(documents, new DocumentCountCardinality(configuration.getType(), !merge));
default:
break;
}
return documents;
}
use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocNotKeep.
@Test
public void apply_buildDocNotKeep() throws IOException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter();
Set<String> blacklist = new HashSet<>();
blacklist.add("FIELD1");
((EventDataQueryFieldFilter) filter).initializeBlacklist(blacklist);
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result == null);
// test that the doc is empty
assertTrue(doc.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocKeepFilteredOut.
@Test
public void apply_buildDocKeepFilteredOut() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.EMPTY_SET);
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result == null);
// test that the doc is empty
assertTrue(doc.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocKeep.
@Test
public void apply_buildDocKeep() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD1");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1'"), Collections.emptySet());
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result != null);
DatawaveKey parsedResult = new DatawaveKey(result);
assertTrue(parsedResult.getDataType().equals("dataType1"));
assertTrue(parsedResult.getUid().equals("123.345.456"));
assertTrue(parsedResult.getFieldName().equals("FIELD1"));
assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
// test that the doc is empty
assertTrue(doc.size() == 2);
assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
assertTrue(doc.get("FIELD1").getData().toString().equals("VALUE1"));
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.predicate.EventDataQueryFieldFilter in project datawave by NationalSecurityAgency.
the class TLDTermFrequencyAggregatorTest method apply_buildDocOnlyKeepToKeep.
@Test
public void apply_buildDocOnlyKeepToKeep() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456.1", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.emptySet());
aggregator = new TLDTermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result == null);
// test that the doc is empty
assertTrue(doc.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Aggregations