use of datawave.query.attributes.Attributes in project datawave by NationalSecurityAgency.
the class FacetedGrouping method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document currentDoc = input.getValue();
// list of document attributes to update.
TreeMultimap<String, Attribute<?>> newDocumentAttributes = TreeMultimap.create();
Key topKey = null;
if (topKey == null)
topKey = input.getKey();
currentDoc = input.getValue();
Map<?, ?> currentAttr = currentDoc.getDictionary();
for (Entry<?, ?> attrE : currentAttr.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
if (attr.getValue() instanceof Attributes) {
Attributes newAttrs = new Attributes(attr.getValue().isToKeep());
Set<Attribute<? extends Comparable<?>>> attributes = ((Attributes) attr.getValue()).getAttributes();
if (log.isTraceEnabled())
log.trace(attr.getKey() + " is attributes, size is " + attributes.size());
Collection<FieldValueCardinality> cardList = cachedAttributeRanges.get(attr.getKey());
// we already know that we will exceed the list size
if (cardList.size() + attributes.size() > config.getMaximumFacetGroupCount()) {
if (log.isTraceEnabled())
log.trace("cardinality exceeds maximum facet count");
cardList = adjustAttributeGrouping(cardList, attributes);
}
List<Cardinality> newCardList = Lists.newArrayList();
for (FieldValueCardinality fvcBucket : cardList) {
FieldValueCardinality fvc = new FieldValueCardinality();
fvc.setContent(fvcBucket.getFloorValue());
fvc.setCeiling(fvcBucket.getCeilingValue());
// for cardinalities, only use the visibility metadata
Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, attr.getValue().getColumnVisibility(), -1);
Cardinality card = new Cardinality(fvc, attr.getValue().getMetadata(), newAttrs.isToKeep());
newCardList.add(card);
}
for (Attribute<? extends Comparable<?>> myAttributeList : attributes) {
Cardinality card = (Cardinality) myAttributeList;
boolean foundBucket = false;
for (Cardinality fvcBucket : newCardList) {
if (fvcBucket.getContent().isWithin(card.getContent())) {
try {
fvcBucket.getContent().merge(card.getContent());
foundBucket = true;
} catch (CardinalityMergeException e) {
throw new RuntimeException(e);
}
break;
}
}
if (!foundBucket) {
newCardList.add(card);
}
}
for (Cardinality cardBucket : newCardList) {
newAttrs.add(cardBucket);
cachedAttributeRanges.put(attr.getKey(), cardBucket.getContent());
}
newDocumentAttributes.put(attr.getKey(), newAttrs);
} else // ignore none Attributes attributes
{
if (log.isTraceEnabled())
log.trace(attr.getKey() + " is " + attr.getValue().getClass());
}
}
}
if (log.isTraceEnabled())
log.trace("entries" + newDocumentAttributes.entries());
for (Entry<String, Attribute<?>> newAttr : newDocumentAttributes.entries()) {
currentDoc.replace(newAttr.getKey(), newAttr.getValue(), false, false);
}
if (log.isTraceEnabled())
log.trace("currentDoc" + currentDoc);
return Maps.immutableEntry(topKey, currentDoc);
}
use of datawave.query.attributes.Attributes in project datawave by NationalSecurityAgency.
the class CardinalitySummation method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@SuppressWarnings("unchecked")
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document currentDoc = new Document();
Key topKey = input.getKey();
// reduce the key to the document key pieces only and a max cq in order to ensure the top key
// sorts after the pieces it is summarizing.
topKey = new Key(topKey.getRow(), topKey.getColumnFamily(), MAX_UNICODE);
DatawaveKey parser = new DatawaveKey(topKey);
currentDoc = input.getValue();
Map<?, ?> currentAttr = currentDoc.getDictionary();
for (Entry<?, ?> attrE : currentAttr.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
TreeMultimap<String, Attribute<?>> tmpMap = TreeMultimap.create();
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
if (attr.getValue() instanceof Attributes) {
Attributes attrs = (Attributes) attr.getValue();
NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
for (Attribute<?> myAttribute : attrs.getAttributes()) {
if (log.isTraceEnabled())
log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
if (!attributes.isEmpty()) {
boolean foundAmongOthers = false;
for (Attribute<?> thoseAttributes : attributes) {
if (myAttribute instanceof Cardinality) {
if (((Cardinality) myAttribute).equals(thoseAttributes)) {
Cardinality card = (Cardinality) thoseAttributes;
Cardinality otherCard = (Cardinality) myAttribute;
merge(card, otherCard, parser, merge);
if (log.isTraceEnabled())
log.trace("Offering to " + attr.getKey() + " value " + card.getContent().getFloorValue() + " " + card.getContent().getCeilingValue());
foundAmongOthers = true;
break;
}
} else
throw new RuntimeException("Have " + myAttribute.getClass());
}
if (!foundAmongOthers) {
if (log.isTraceEnabled())
log.trace("put attributes " + attr.getKey() + " " + myAttribute.getData());
tmpMap.put(attr.getKey(), myAttribute);
}
newDocumentAttributes.putAll(tmpMap);
} else {
if (log.isTraceEnabled())
log.trace("adding attributes " + attr.getKey() + " " + myAttribute.getData());
newDocumentAttributes.put(attr.getKey(), myAttribute);
}
}
} else {
if (log.isTraceEnabled())
log.trace("Testing " + attr.getKey() + " " + attr.getValue().getData());
NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
Attribute<?> attribute = attributes.floor(attr.getValue());
boolean found = false;
for (Attribute<?> thoseAttributes : attributes) {
if (thoseAttributes.equals(attr.getValue())) {
if (log.isTraceEnabled())
log.trace("found for " + attr.getKey() + " " + thoseAttributes.getData());
Cardinality card = (Cardinality) thoseAttributes;
Cardinality otherCard = (Cardinality) attr.getValue();
merge(card, otherCard, parser, merge);
found = true;
break;
} else {
}
}
if (!found) {
if (log.isTraceEnabled())
log.trace("Don't have " + attr.getKey() + " " + attr.getValue().getData());
newDocumentAttributes.put(attr.getKey(), attr.getValue());
}
}
}
}
referenceDocument = new Document();
if (log.isTraceEnabled())
log.trace("entries" + newDocumentAttributes.entries());
referenceDocument.putAll(newDocumentAttributes.entries().iterator(), false);
if (log.isTraceEnabled())
log.trace("currentDoc" + referenceDocument);
referenceKey = topKey;
return Maps.immutableEntry(topKey, referenceDocument);
}
use of datawave.query.attributes.Attributes in project datawave by NationalSecurityAgency.
the class MinimumEstimation method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document currentDoc = new Document();
TreeMultimap<String, Attribute<? extends Comparable<?>>> newDocumentAttributes = TreeMultimap.create();
Key topKey = null;
if (topKey == null)
topKey = input.getKey();
currentDoc = input.getValue();
Map<?, ?> currentAttr = currentDoc.getDictionary();
for (Entry<?, ?> attrE : currentAttr.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
if (attr.getValue() instanceof Attributes) {
Attributes attrs = (Attributes) attr.getValue();
NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
for (Attribute<?> myAttribute : attrs.getAttributes()) {
if (log.isTraceEnabled())
log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
if (myAttribute instanceof Cardinality) {
Cardinality card = (Cardinality) myAttribute;
if (card.getContent().getEstimate().cardinality() >= minimumCount) {
newDocumentAttributes.put(attr.getKey(), myAttribute);
}
} else
throw new RuntimeException("Have " + myAttribute.getClass());
}
} else {
Cardinality card = (Cardinality) attr.getValue();
if (card.getContent().getEstimate().cardinality() >= minimumCount) {
newDocumentAttributes.put(attr.getKey(), card);
}
}
}
}
currentDoc = new Document();
if (log.isTraceEnabled())
log.trace("entries" + newDocumentAttributes.entries());
currentDoc.putAll(newDocumentAttributes.entries().iterator(), false);
if (log.isTraceEnabled())
log.trace("currentDoc" + currentDoc);
return Maps.immutableEntry(topKey, currentDoc);
}
use of datawave.query.attributes.Attributes in project datawave by NationalSecurityAgency.
the class IfThisTestFailsThenHitTermsAreBroken method runTestQuery.
protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms, Multimap<String, String> expectedHitTerms) throws Exception {
log.debug("runTestQuery");
log.trace("Creating QueryImpl");
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(querystr);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
settings.setParameters(extraParms);
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
HashSet<String> expectedSet = new HashSet<>(expected);
HashSet<String> resultSet;
resultSet = new HashSet<>();
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.debug(entry.getKey() + " => " + d);
Attribute<?> attr = d.get("UUID.0");
Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
TypeAttribute<?> uuidAttr = (TypeAttribute<?>) attr;
String uuid = uuidAttr.getType().getDelegate().toString();
Assert.assertTrue("Received unexpected UUID: " + uuid, expected.contains(uuid));
Attribute<?> hitTermAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
if (hitTermAttribute instanceof Attributes) {
Attributes hitTerms = (Attributes) hitTermAttribute;
for (Attribute<?> hitTerm : hitTerms.getAttributes()) {
log.debug("hitTerm:" + hitTerm);
String hitString = hitTerm.getData().toString();
log.debug("as string:" + hitString);
log.debug("expectedHitTerms:" + expectedHitTerms);
Assert.assertNotEquals(hitTerm.getTimestamp(), Long.MAX_VALUE);
// make sure this hitString is in the map, and remove it
boolean result = expectedHitTerms.get(uuid).remove(hitString);
if (result == false) {
log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
} else {
log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
}
}
} else if (hitTermAttribute instanceof Attribute) {
log.debug("hitTerm:" + (Attribute<?>) hitTermAttribute);
String hitString = ((Attribute<?>) hitTermAttribute).getData().toString();
log.debug("as string:" + hitString);
log.debug("expectedHitTerms:" + expectedHitTerms);
boolean result = expectedHitTerms.get(uuid).remove(hitString);
if (result == false) {
log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
} else {
log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerm:" + (Attribute<?>) hitTermAttribute);
}
}
resultSet.add(uuid);
docs.add(d);
}
if (expected.size() > resultSet.size()) {
expectedSet.addAll(expected);
expectedSet.removeAll(resultSet);
for (String s : expectedSet) {
log.warn("Missing: " + s);
}
}
if (!expected.containsAll(resultSet)) {
log.error("Expected results " + expected + " differ form actual results " + resultSet);
}
Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
// the map is empty if there were no unexpected hit terms in it
log.debug("expectedHitTerms:" + expectedHitTerms);
Assert.assertTrue(expectedHitTerms.isEmpty());
}
use of datawave.query.attributes.Attributes in project datawave by NationalSecurityAgency.
the class UseOccurrenceToCountInJexlContextTest method runTestQuery.
protected void runTestQuery(List<String> expected, String querystr, Date startDate, Date endDate, Map<String, String> extraParms, Multimap<String, String> expectedHitTerms, Connector connector) throws Exception {
log.debug("runTestQuery");
log.trace("Creating QueryImpl");
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(querystr);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
settings.setParameters(Collections.singletonMap("hit.list", "true"));
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
HashSet<String> expectedSet = new HashSet<>(expected);
HashSet<String> resultSet;
resultSet = new HashSet<>();
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.debug(entry.getKey() + " => " + d);
Attribute<?> attr = d.get("UUID.0");
Assert.assertNotNull("Result Document did not contain a 'UUID'", attr);
Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute);
TypeAttribute<?> uuidAttr = (TypeAttribute<?>) attr;
String uuid = uuidAttr.getType().getDelegate().toString();
Assert.assertTrue("Received unexpected UUID: " + uuid, expected.contains(uuid));
Attribute<?> hitTermAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
if (hitTermAttribute instanceof Attributes) {
Attributes hitTerms = (Attributes) hitTermAttribute;
for (Attribute<?> hitTerm : hitTerms.getAttributes()) {
log.debug("hitTerm:" + hitTerm);
String hitString = hitTerm.getData().toString();
log.debug("as string:" + hitString);
log.debug("expectedHitTerms:" + expectedHitTerms);
// make sure this hitString is in the map, and remove it
boolean result = expectedHitTerms.get(uuid).remove(hitString);
if (result == false) {
log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
} else {
log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerms:" + hitTerms);
}
}
} else if (hitTermAttribute instanceof Attribute) {
log.debug("hitTerm:" + (Attribute<?>) hitTermAttribute);
String hitString = ((Attribute<?>) hitTermAttribute).getData().toString();
log.debug("as string:" + hitString);
log.debug("expectedHitTerms:" + expectedHitTerms);
boolean result = expectedHitTerms.get(uuid).remove(hitString);
if (result == false) {
log.debug("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
Assert.fail("failed to find hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms);
} else {
log.debug("removed hitString:" + hitString + " for uuid:" + uuid + " in expectedHitTerms:" + expectedHitTerms + " from hitTerm:" + (Attribute<?>) hitTermAttribute);
}
}
resultSet.add(uuid);
docs.add(d);
}
if (expected.size() > resultSet.size()) {
expectedSet.addAll(expected);
expectedSet.removeAll(resultSet);
for (String s : expectedSet) {
log.warn("Missing: " + s);
}
}
if (!expected.containsAll(resultSet)) {
log.error("Expected results " + expected + " differ form actual results " + resultSet);
}
Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet));
Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size());
// the map is empty if there were no unexpected hit terms in it
log.debug("expectedHitTerms:" + expectedHitTerms);
Assert.assertTrue(expectedHitTerms.isEmpty());
}
Aggregations