use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitorTest method eval.
private void eval(ASTJexlScript query, Range docRange, Key docKeyHit, List<Map.Entry<Key, Value>> source, boolean buildDoc, Map<String, List<String>> docKeys, Set<String> termFrequencyFields, Set<String> aggregationFields, Set<String> indexOnlyFields) throws IOException {
IteratorBuildingVisitor visitor = new IteratorBuildingVisitor();
TypeMetadata typeMetadata = new TypeMetadata();
Iterator<Map.Entry<Key, Value>> iterator = source.iterator();
IteratorEnvironment env = new BaseIteratorEnvironment();
visitor.setSource(new SourceFactory(iterator), env);
// configure the visitor for use
visitor.setTermFrequencyFields(termFrequencyFields);
visitor.setFieldsToAggregate(aggregationFields);
visitor.setIndexOnlyFields(indexOnlyFields);
visitor.setRange(docRange);
visitor.setTimeFilter(TimeFilter.alwaysTrue());
visitor.setLimitLookup(true);
visitor.setTypeMetadata(typeMetadata);
query.jjtAccept(visitor, null);
NestedIterator result = visitor.root();
Assert.assertTrue(result != null);
SeekableNestedIterator seekableNestedIterator = new SeekableNestedIterator(result, env);
seekableNestedIterator.seek(docRange, null, true);
seekableNestedIterator.initialize();
// asserts for a hit or miss
if (docKeyHit == null) {
Assert.assertFalse(seekableNestedIterator.hasNext());
} else {
Assert.assertTrue(seekableNestedIterator.hasNext());
Key next = (Key) seekableNestedIterator.next();
Assert.assertTrue(next != null);
Assert.assertTrue(next.getRow().toString().equals(docKeyHit.getRow().toString()));
Assert.assertTrue(next.getColumnFamily().toString().equals(docKeyHit.getColumnFamily().toString()));
// asserts for document build
Document d = seekableNestedIterator.document();
Assert.assertTrue(d != null);
if (buildDoc) {
// +1 is for RECORD_ID field
Assert.assertTrue(docKeys.keySet().size() + 1 == d.getDictionary().size());
// verify hits for each specified field
for (String field : docKeys.keySet()) {
List<String> expected = docKeys.get(field);
if (expected.size() == 1) {
// verify the only doc
Assert.assertTrue(d.getDictionary().get(field).getData().equals(expected.get(0)));
} else {
// the data should be a set, verify it matches expected
Object dictData = d.getDictionary().get(field).getData();
Assert.assertTrue(dictData != null);
Assert.assertTrue(dictData instanceof Set);
Set dictSet = (Set) dictData;
Assert.assertTrue(dictSet.size() == expected.size());
Iterator<Attribute> dictIterator = dictSet.iterator();
while (dictIterator.hasNext()) {
Assert.assertTrue(expected.remove(dictIterator.next().getData()));
}
// verify that the expected set is now empty
Assert.assertTrue(expected.size() == 0);
}
}
} else {
// doc should be empty
Assert.assertTrue(d.getDictionary().size() == 0);
}
// there should be no other hits
Assert.assertFalse(seekableNestedIterator.hasNext());
}
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class DocumentTransformerSupport method extractMetrics.
protected void extractMetrics(Document document, Key documentKey) {
Map<String, Attribute<? extends Comparable<?>>> dictionary = document.getDictionary();
Attribute<? extends Comparable<?>> timingMetadataAttribute = dictionary.get(LogTiming.TIMING_METADATA);
if (timingMetadataAttribute != null && timingMetadataAttribute instanceof TimingMetadata) {
TimingMetadata timingMetadata = (TimingMetadata) timingMetadataAttribute;
long currentSourceCount = timingMetadata.getSourceCount();
long currentNextCount = timingMetadata.getNextCount();
long currentSeekCount = timingMetadata.getSeekCount();
long currentYieldCount = timingMetadata.getYieldCount();
String host = timingMetadata.getHost();
sourceCount += currentSourceCount;
nextCount += currentNextCount;
seekCount += currentSeekCount;
yieldCount += currentYieldCount;
Map<String, Long> stageTimers = timingMetadata.getStageTimers();
if (stageTimers.containsKey(QuerySpan.Stage.DocumentSpecificTree.toString())) {
docRanges++;
} else if (stageTimers.containsKey(QuerySpan.Stage.FieldIndexTree.toString())) {
fiRanges++;
}
if (logTimingDetails || log.isTraceEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append("retrieved document from host:").append(host).append(" at key:").append(documentKey.toStringNoTime()).append(" stageTimers:").append(stageTimers);
sb.append(" sourceCount:").append(currentSourceCount).append(" nextCount:").append(currentNextCount).append(" seekCount:").append(currentSeekCount).append(" yieldCount:").append(currentYieldCount);
if (log.isTraceEnabled()) {
log.trace(sb.toString());
} else {
log.info(sb.toString());
}
}
if (dictionary.size() == 1) {
// this document contained only timing metadata
throw new EmptyObjectException();
}
}
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class GroupingTransform method getListKeyCounts.
private void getListKeyCounts(Entry<Key, Document> entry) {
log.trace("{} get list key counts for: {}", flatten ? "t" : "web" + "server", entry);
keys.add(entry.getKey());
Set<String> expandedGroupFieldsList = new LinkedHashSet<>();
// if the incoming Documents have been aggregated on the tserver, they will have a COUNT field.
// use the value in the COUNT field as a loop max when the fields are put into the countingMap
// During the flush operation, a new COUNT field will be created based on the number of unique
// field sets in the countingMap
Map<String, Attribute<? extends Comparable<?>>> dictionary = entry.getValue().getDictionary();
Map<String, Integer> countKeyMap = new HashMap<>();
dictionary.keySet().stream().filter(key -> key.startsWith("COUNT")).filter(countKey -> entry.getValue().getDictionary().containsKey(countKey)).forEach(countKey -> {
TypeAttribute countTypeAttribute = ((TypeAttribute) entry.getValue().getDictionary().get(countKey));
int count = ((BigDecimal) countTypeAttribute.getType().getDelegate()).intValue();
countKeyMap.put(countKey, count);
});
Multimap<String, String> fieldToFieldWithContextMap = this.getFieldToFieldWithGroupingContextMap(entry.getValue(), expandedGroupFieldsList);
log.trace("got a new fieldToFieldWithContextMap: {}", fieldToFieldWithContextMap);
int longest = this.longestValueList(fieldToFieldWithContextMap);
for (int i = 0; i < longest; i++) {
Collection<GroupingTypeAttribute<?>> fieldCollection = new HashSet<>();
String currentGroupingContext = "";
for (String fieldListItem : expandedGroupFieldsList) {
log.trace("fieldListItem: {}", fieldListItem);
Collection<String> gtNames = fieldToFieldWithContextMap.get(fieldListItem);
if (gtNames == null || gtNames.isEmpty()) {
log.trace("gtNames: {}", gtNames);
log.trace("fieldToFieldWithContextMap: {} did not contain: {}", fieldToFieldWithContextMap, fieldListItem);
} else {
String gtName = gtNames.iterator().next();
int idx = gtName.indexOf('.');
if (idx != -1) {
currentGroupingContext = gtName.substring(idx + 1);
}
if (!fieldListItem.equals(gtName)) {
fieldToFieldWithContextMap.remove(fieldListItem, gtName);
}
log.trace("fieldToFieldWithContextMap now: {}", fieldToFieldWithContextMap);
log.trace("gtName: {}", gtName);
fieldCollection.add(fieldMap.get(gtName));
}
}
if (fieldCollection.size() == expandedGroupFieldsList.size()) {
// get the count out of the countKeyMap
Integer count = countKeyMap.get("COUNT." + currentGroupingContext);
if (count == null)
count = 1;
// see above comment about the COUNT field
log.trace("{} adding {} of {} to counting map", flatten ? "tserver" : "webserver", count, fieldCollection);
IntStream.range(0, count).forEach(j -> countingMap.add(fieldCollection));
fieldVisibilities.put(fieldCollection, getColumnVisibility(entry));
log.trace("put {} to {} into fieldVisibilities {}", fieldCollection, getColumnVisibility(entry), fieldVisibilities);
} else {
log.trace("fieldList.size() != this.expandedGroupFieldsList.size()");
log.trace("fieldList: {}", fieldCollection);
log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList);
}
}
log.trace("countingMap: {}", countingMap);
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class TLDFieldIndexAggregatorTest method apply_testAggregateFilter.
@Test
public void apply_testAggregateFilter() throws IOException {
EventDataQueryFilter mockFilter = EasyMock.createMock(EventDataQueryFilter.class);
TypeMetadata typeMetadata = new TypeMetadata();
AttributeFactory factory = new AttributeFactory(typeMetadata);
Set<String> aggregatedFields = new HashSet<>();
aggregatedFields.add("FOO");
aggregator = new TLDFieldIndexAggregator(aggregatedFields, mockFilter, -1);
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
Key fi1 = getFi("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10);
Key fi2 = getFi("123", "FIELD1", "VALUE2", "dataType1", "123.345.456.1", 10);
Key fi3 = getFi("123", "FIELD1", "VALUE3", "dataType1", "123.345.456.2", 10);
Key fi4 = getFi("123", "FIELD1", "VALUE4", "dataType1", "123.345.456.3", 10);
// FOO included in the filter
Key fi5 = getFi("123", "FOO", "bar", "dataType1", "123.345.456.3", 10);
// FOO2 not included in the filter
Key fi6 = getFi("123", "FOO2", "bar", "dataType1", "123.345.456.3", 10);
// key outside the range which should not be aggregated
Key fi7 = getFi("123", "XENO", "zap", "dataType1", "234.345.456", 10);
treeMap.put(fi1, new Value());
treeMap.put(fi2, new Value());
treeMap.put(fi3, new Value());
treeMap.put(fi4, new Value());
treeMap.put(fi5, new Value());
treeMap.put(fi6, new Value());
treeMap.put(fi7, new Value());
EasyMock.expect(mockFilter.keep(EasyMock.isA(Key.class))).andReturn(true);
EasyMock.replay(mockFilter);
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Document doc = new Document();
aggregator.apply(itr, doc, factory);
EasyMock.verify(mockFilter);
// list of FIELD1 values to expect
List<String> expectedFieldValues = new ArrayList<>();
expectedFieldValues.add("VALUE1");
expectedFieldValues.add("VALUE2");
expectedFieldValues.add("VALUE3");
expectedFieldValues.add("VALUE4");
assertTrue(doc.get("FIELD1").isToKeep());
Set<Attribute> attributes = ((Set<Attribute>) doc.get("FIELD1").getData());
assertTrue(attributes.size() == 4);
Iterator<Attribute> attrItr = attributes.iterator();
while (attrItr.hasNext()) {
Attribute attr = attrItr.next();
assertFalse(attr.isToKeep());
assertTrue(expectedFieldValues.remove(attr.getData().toString()));
}
assertTrue(expectedFieldValues.size() == 0);
// FOO kept
assertTrue(doc.get("FOO").isToKeep());
// FOO2 not kept
assertTrue(!doc.get("FOO2").isToKeep());
// out of document range not included
assertTrue(doc.get("XENO") == null);
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class GroupFields method apply.
@Override
public Entry<Key, Document> apply(Entry<Key, Document> entry) {
Document document = entry.getValue();
for (Map.Entry<String, Attribute<? extends Comparable<?>>> de : document.entrySet()) {
String keyWithGrouping = de.getKey();
log.trace("keyWithGrouping is:" + keyWithGrouping);
String keyNoGrouping = keyWithGrouping;
// if we have grouping context on, remove the grouping context
if (keyNoGrouping.indexOf('.') != -1) {
keyNoGrouping = keyNoGrouping.substring(0, keyNoGrouping.indexOf('.'));
log.trace("keyNoGrouping is:" + keyNoGrouping);
}
if (this.groupFieldsSet.contains(keyNoGrouping)) {
// look for the key without the grouping context
if (log.isTraceEnabled())
log.trace("groupFieldsSet contains " + keyNoGrouping + " so grouping with " + keyWithGrouping);
Attribute<?> attr = de.getValue();
int delta = 1;
if (attr instanceof Attributes) {
Attributes attrs = (Attributes) attr;
delta = attrs.size();
log.trace("delta for " + attrs + " is " + delta);
} else {
log.trace("delta for " + attr + " is " + delta);
}
// increment the count
int count = this.groupFieldsMap.get(keyWithGrouping) == null ? 0 : this.groupFieldsMap.get(keyWithGrouping);
this.groupFieldsMap.put(keyWithGrouping, count + delta);
}
}
// mutate the document with the changes collected in the above loop
applyCounts(document, groupFieldsMap);
return entry;
}
Aggregations