use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class HitsAreAlwaysIncludedCommonalityTokenTest method runTestQuery.
protected void runTestQuery(Connector connector, String queryString, Date startDate, Date endDate, Map<String, String> extraParms, Collection<String> goodResults) throws Exception {
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(queryString);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.trace(entry.getKey() + " => " + d);
docs.add(d);
Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
if (hitAttribute instanceof Attributes) {
Attributes attributes = (Attributes) hitAttribute;
for (Attribute attr : attributes.getAttributes()) {
if (attr instanceof Content) {
Content content = (Content) attr;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
}
} else if (hitAttribute instanceof Content) {
Content content = (Content) hitAttribute;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
// remove from goodResults as we find the expected return fields
log.debug("goodResults: " + goodResults);
Map<String, Attribute<? extends Comparable<?>>> dictionary = d.getDictionary();
log.debug("dictionary:" + dictionary);
for (Entry<String, Attribute<? extends Comparable<?>>> dictionaryEntry : dictionary.entrySet()) {
Attribute<? extends Comparable<?>> attribute = dictionaryEntry.getValue();
if (attribute instanceof Attributes) {
for (Attribute attr : ((Attributes) attribute).getAttributes()) {
String toFind = dictionaryEntry.getKey() + ":" + attr;
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
} else {
String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue();
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
}
Assert.assertTrue(goodResults + " was not empty", goodResults.isEmpty());
}
Assert.assertTrue("No docs were returned!", !docs.isEmpty());
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class QueryIteratorIT method eval.
/**
* Deserialize and evaluate the document, expects 0 to 1 documents
*
* @param docKeyHit
* the expected hit Key, null if no hit expected
* @param docKeys
* the expected values
* @throws IOException
*/
protected void eval(Key docKeyHit, Map<String, List<String>> docKeys) throws IOException {
// asserts for a hit or miss
if (docKeyHit == null) {
assertFalse(iterator.hasTop());
} else {
assertTrue("Expected hit, but got none", iterator.hasTop());
Key next = iterator.getTopKey();
assertNotNull(next);
assertEquals(next.getRow().toString(), docKeyHit.getRow().toString());
assertEquals(next.getColumnFamily().toString(), docKeyHit.getColumnFamily().toString());
// asserts for document build
Value topValue = iterator.getTopValue();
assertNotNull(topValue);
Map.Entry<Key, Document> deserializedValue = deserialize(topValue);
assertNotNull(deserializedValue.getValue());
Document d = deserializedValue.getValue();
assertNotNull(d);
// -1 is for RECORD_ID field and -1 for HIT_LIST if configured
int baseSize = d.getDictionary().size() - 1;
int docSize = isExpectHitTerm() ? baseSize - 1 : baseSize;
assertEquals("Unexpected doc size: " + d.getDictionary().size() + "\nGot: " + docSize + "\n" + "expected: " + docKeys, docKeys.keySet().size(), docSize);
// validate the hitlist
assertEquals("HIT_TERM presence expected: " + isExpectHitTerm() + " actual: " + (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), isExpectHitTerm());
// verify hits for each specified field
for (String field : docKeys.keySet()) {
List<String> expected = docKeys.get(field);
if (expected.size() == 1) {
// verify the only doc
Attribute<?> docAttr = d.getDictionary().get(field);
if (docAttr instanceof Attributes) {
// Special handling of Content attributes, typically when TermFrequencies are looked up.
// TFs append Content attributes which results in Attributes coming back instead of a single Attribute
Set<?> datas = (Set<?>) docAttr.getData();
Set<String> dataStrings = datas.stream().map(Object::toString).collect(Collectors.toSet());
boolean stringsMatch = dataStrings.contains(expected.get(0));
assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
} else {
boolean stringsMatch = docAttr.getData().toString().equals(expected.get(0));
assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
}
} else {
// the data should be a set, verify it matches expected
Object dictData = d.getDictionary().get(field).getData();
assertNotNull(dictData);
assertTrue("Expected " + expected.size() + " values for '" + field + "' found 1, '" + dictData.toString() + "'\nexpected: " + expected, dictData instanceof Set);
Set dictSet = (Set) dictData;
assertEquals("Expected " + expected.size() + " values for '" + field + "' found " + dictSet.size() + "\nfound: " + dictSet.toString() + "\nexpected: " + expected, dictSet.size(), expected.size());
Iterator<Attribute> dictIterator = dictSet.iterator();
while (dictIterator.hasNext()) {
String foundString = dictIterator.next().getData().toString();
assertTrue("could not find " + foundString + " in results! Still had " + expected, expected.remove(foundString));
}
// verify that the expected set is now empty
assertEquals(0, expected.size());
}
}
// there should be no other hits
iterator.next();
}
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class DocumentProjection method trim.
private Document trim(Document d) {
Map<String, Attribute<? extends Comparable<?>>> dict = d.getDictionary();
Document newDoc = new Document();
for (Entry<String, Attribute<? extends Comparable<?>>> entry : dict.entrySet()) {
String fieldName = entry.getKey();
Attribute<?> attr = entry.getValue();
if (projection.apply(fieldName)) {
// should be excluded via the blacklist
if (projection.isUseBlacklist()) {
if (attr instanceof Document) {
Document newSubDoc = trim((Document) attr);
if (0 < newSubDoc.size()) {
newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse);
}
continue;
} else if (attr instanceof Attributes) {
Attributes subAttrs = trim((Attributes) attr, fieldName);
if (0 < subAttrs.size()) {
newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse);
}
continue;
}
}
// We just want to add this subtree
newDoc.put(fieldName, (Attribute<?>) attr.copy(), this.includeGroupingContext, this.reducedResponse);
} else if (!projection.isUseBlacklist()) {
// if there is a child that does match the whitelist
if (attr instanceof Document) {
Document newSubDoc = trim((Document) attr);
if (0 < newSubDoc.size()) {
newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse);
}
} else if (attr instanceof Attributes) {
// Since Document instances can be nested under attributes and vice-versa
// all the way down, we need to pass along the fieldName so that when we
// have come up with a nested document it can be evaluated by its own name
Attributes subAttrs = trim((Attributes) attr, fieldName);
if (0 < subAttrs.size()) {
newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse);
}
}
}
}
return newDoc;
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class AbstractVersionFilter method validate.
/*
* Validate the UID based on the specified pattern. An invalid UID may mean different things in different contexts, such as
* "No, this Key does not belong to the most current version." but should always prevent the unmodified input from being returned.
*
* @param uid
*
* @param pattern
*
* @param key
*
* @param document
*
* @param isMultiMapping indicates whether multiple data types are mapped, which helps make pattern lookup a little more efficient
*
* @return
*/
private boolean validate(final String dataType, final String uid, final String pattern, final Key key, final Document document, boolean isMultiMapping) {
boolean isValid = true;
if (!uid.isEmpty()) {
final String[] split = uid.split(pattern);
try {
final Text row = key.getRow();
if (split.length == 2) {
final String matchedUid = uid.substring(0, (uid.length() - split[1].length()));
isValid = this.validate(row, dataType, matchedUid, isMultiMapping);
} else if (uid.matches(pattern)) {
isValid = this.validate(row, dataType, uid, isMultiMapping);
}
if (!isValid) {
if (null != document) {
if (null != document) {
final Set<Entry<String, Attribute<? extends Comparable<?>>>> entries = new HashSet<>(document.entrySet());
for (final Entry<String, Attribute<? extends Comparable<?>>> entry : entries) {
document.removeAll(entry.getKey());
}
}
}
}
} catch (final Exception e) {
LOG.error("Could not validate normalized version for " + key, e);
}
}
return isValid;
}
use of datawave.query.attributes.Attribute in project datawave by NationalSecurityAgency.
the class AttributeToCardinality method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document prevDoc = input.getValue();
Key key = input.getKey();
// for cardinalities, only use the visibility metadata
Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, prevDoc.getColumnVisibility(), -1);
Document newDoc = new Document();
Map<?, ?> dictionary = (Map<?, ?>) prevDoc.getData();
TreeMap<String, Attribute<? extends Comparable<?>>> newDictionary = Maps.newTreeMap();
DatawaveKey parser = new DatawaveKey(input.getKey());
for (Entry<?, ?> attrE : dictionary.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
Attribute<?> attribute = attr.getValue();
if (attribute instanceof Attributes) {
Attributes attrs = (Attributes) attribute;
Attributes newAttrs = new Attributes(attrs.isToKeep());
for (Attribute<?> attributeItem : attrs.getAttributes()) {
Cardinality card = null;
if (attributeItem instanceof Cardinality) {
card = (Cardinality) attributeItem;
} else {
FieldValueCardinality fvC = new FieldValueCardinality();
fvC.setContent(attributeItem.getData().toString());
fvC.setDoc(prevDoc);
card = new Cardinality(fvC, metadata, attrs.isToKeep());
if (log.isTraceEnabled())
log.trace("Adding from attributes " + attr.getKey() + " " + attributeItem.getData());
}
newAttrs.add(card);
}
newDictionary.put(attr.getKey(), newAttrs);
} else {
Cardinality card = null;
if (attribute instanceof Cardinality) {
card = (Cardinality) attribute;
} else {
FieldValueCardinality fvC = new FieldValueCardinality();
fvC.setContent(attribute.getData().toString());
fvC.setDoc(prevDoc);
card = new Cardinality(fvC, metadata, attribute.isToKeep());
if (log.isTraceEnabled())
log.trace("Adding " + parser.getUid() + " " + attr.getKey() + " " + attribute.getData() + " " + fvC.getEstimate().cardinality());
}
newDictionary.put(attr.getKey(), card);
}
}
}
newDoc.putAll(newDictionary.entrySet().iterator(), false);
return Maps.immutableEntry(key, newDoc);
}
Aggregations