use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.
the class JexlEvaluation method apply.
@Override
public boolean apply(Tuple3<Key, Document, DatawaveJexlContext> input) {
Object o = script.execute(input.third());
if (log.isTraceEnabled()) {
log.trace("Evaluation of " + query + " against " + input.third() + " returned " + o);
}
boolean matched = isMatched(o);
// Add delayed info to document
if (matched && input.third() instanceof DelayedNonEventIndexContext) {
((DelayedNonEventIndexContext) input.third()).populateDocument(input.second());
}
if (arithmetic instanceof HitListArithmetic) {
HitListArithmetic hitListArithmetic = (HitListArithmetic) arithmetic;
if (matched) {
Document document = input.second();
Attributes attributes = new Attributes(input.second().isToKeep());
for (ValueTuple hitTuple : hitListArithmetic.getHitTuples()) {
ColumnVisibility cv = null;
String term = hitTuple.getFieldName() + ':' + hitTuple.getValue();
if (hitTuple.getSource() != null) {
cv = hitTuple.getSource().getColumnVisibility();
}
// fall back to extracting column visibility from document
if (cv == null) {
// get the visibility for the record with this hit
cv = HitListArithmetic.getColumnVisibilityForHit(document, term);
// if no visibility computed, then there were no hits that match fields still in the document......
}
if (cv != null) {
// unused
// will force an update to make the metadata valid
long timestamp = document.getTimestamp();
Content content = new Content(term, document.getMetadata(), document.isToKeep());
content.setColumnVisibility(cv);
attributes.add(content);
}
}
if (attributes.size() > 0) {
document.put(HIT_TERM_FIELD, attributes);
}
}
hitListArithmetic.clear();
}
return matched;
}
use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.
the class HitsAreAlwaysIncludedCommonalityTokenTest method runTestQuery.
protected void runTestQuery(Connector connector, String queryString, Date startDate, Date endDate, Map<String, String> extraParms, Collection<String> goodResults) throws Exception {
QueryImpl settings = new QueryImpl();
settings.setBeginDate(startDate);
settings.setEndDate(endDate);
settings.setPagesize(Integer.MAX_VALUE);
settings.setQueryAuthorizations(auths.serialize());
settings.setQuery(queryString);
settings.setParameters(extraParms);
settings.setId(UUID.randomUUID());
log.debug("query: " + settings.getQuery());
log.debug("logic: " + settings.getQueryLogicName());
GenericQueryConfiguration config = logic.initialize(connector, settings, authSet);
logic.setupQuery(config);
Set<Document> docs = new HashSet<>();
for (Entry<Key, Value> entry : logic) {
Document d = deserializer.apply(entry).getValue();
log.trace(entry.getKey() + " => " + d);
docs.add(d);
Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD);
if (hitAttribute instanceof Attributes) {
Attributes attributes = (Attributes) hitAttribute;
for (Attribute attr : attributes.getAttributes()) {
if (attr instanceof Content) {
Content content = (Content) attr;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
}
} else if (hitAttribute instanceof Content) {
Content content = (Content) hitAttribute;
Assert.assertTrue(goodResults.contains(content.getContent()));
}
// remove from goodResults as we find the expected return fields
log.debug("goodResults: " + goodResults);
Map<String, Attribute<? extends Comparable<?>>> dictionary = d.getDictionary();
log.debug("dictionary:" + dictionary);
for (Entry<String, Attribute<? extends Comparable<?>>> dictionaryEntry : dictionary.entrySet()) {
Attribute<? extends Comparable<?>> attribute = dictionaryEntry.getValue();
if (attribute instanceof Attributes) {
for (Attribute attr : ((Attributes) attribute).getAttributes()) {
String toFind = dictionaryEntry.getKey() + ":" + attr;
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
} else {
String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue();
boolean found = goodResults.remove(toFind);
if (found)
log.debug("removed " + toFind);
else
log.debug("Did not remove " + toFind);
}
}
Assert.assertTrue(goodResults + " was not empty", goodResults.isEmpty());
}
Assert.assertTrue("No docs were returned!", !docs.isEmpty());
}
use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.
the class ContentTransform method apply.
@Nullable
@Override
public Map.Entry<Key, Document> apply(@Nullable Map.Entry<Key, Document> keyDocumentEntry) {
if (keyDocumentEntry != null) {
Document document = keyDocumentEntry.getValue();
Key documentKey = DocumentTransformer.correctKey(keyDocumentEntry.getKey());
String colf = documentKey.getColumnFamily().toString();
int index = colf.indexOf("\0");
String uid = colf.substring(index + 1);
for (String contentFieldName : this.contentFieldNames) {
if (document.containsKey(contentFieldName)) {
Attribute<?> contentField = document.remove(contentFieldName);
if (contentField.getData().toString().equalsIgnoreCase("true")) {
Content c = new Content(uid, contentField.getMetadata(), document.isToKeep());
document.put(contentFieldName, c, false, this.reducedResponse);
}
}
}
}
return keyDocumentEntry;
}
use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.
the class DocumentTransformerSupport method convertMappedAttribute.
private Attribute<?> convertMappedAttribute(Attribute<?> attribute) {
String attributeString = attribute.getData().toString();
int idx = attributeString.indexOf(':');
if (idx != -1) {
String firstPart = attributeString.substring(0, idx);
String secondPart = attributeString.substring(idx);
// Apply the reverse mapping to make the field name human-readable again
if (null != this.getQm()) {
firstPart = this.getQm().aliasFieldNameReverseModel(firstPart);
}
attribute = new Content(firstPart + secondPart, attribute.getMetadata(), attribute.isToKeep());
}
return attribute;
}
use of datawave.query.attributes.Content in project datawave by NationalSecurityAgency.
the class TermOffsetPopulator method getContextMap.
/**
* Build TermOffset map for use in JexlEvaluation
*
* @param docKey
* key that maps to a document
* @param keys
* set of keys that map to hits on tf fields
* @param fields
* set of fields to remove from the search space
* @return
*/
public Map<String, Object> getContextMap(Key docKey, Set<Key> keys, Set<String> fields) {
document = new Document();
TermFrequencyIterator tfSource;
// Do not prune if no fields exist or if the tf fields would prune to nothing. TODO skip tf entirely if this would prune to zero
if (fields == null || fields.isEmpty() || fields.size() == termFrequencyFieldValues.keySet().size()) {
tfSource = new TermFrequencyIterator(termFrequencyFieldValues, keys);
} else {
// There are fields to remove, reduce the search space and continue
Multimap<String, String> tfFVs = HashMultimap.create(termFrequencyFieldValues);
fields.forEach(tfFVs::removeAll);
tfSource = new TermFrequencyIterator(tfFVs, keys);
if (tfFVs.size() == 0) {
log.error("Created a TFIter with no field values. Orig fields: " + termFrequencyFieldValues.keySet() + " fields to remove: " + fields);
}
}
Range range = getRange(keys);
try {
tfSource.init(source, null, null);
tfSource.seek(getRange(keys), null, false);
} catch (IOException e) {
log.error("Seek to the range failed: " + range, e);
}
// set the document context on the filter
if (evaluationFilter != null) {
evaluationFilter.startNewDocument(docKey);
}
Map<String, TermFrequencyList> termOffsetMap = Maps.newHashMap();
while (tfSource.hasTop()) {
Key key = tfSource.getTopKey();
FieldValue fv = FieldValue.getFieldValue(key);
// add the zone and term to our internal document
Content attr = new Content(fv.getValue(), source.getTopKey(), evaluationFilter == null || evaluationFilter.keep(key));
// no need to apply the evaluation filter here as the TermFrequencyIterator above is already doing more filtering than we can do here.
// So this filter is simply extraneous. However if the an EventDataQueryFilter implementation gets smarter somehow, then it can be added back in
// here.
// For example the AncestorQueryLogic may require this....
// if (evaluationFilter == null || evaluationFilter.apply(Maps.immutableEntry(key, StringUtils.EMPTY_STRING))) {
this.document.put(fv.getField(), attr);
TreeMultimap<TermFrequencyList.Zone, TermWeightPosition> offsets = TreeMultimap.create();
try {
TermWeight.Info twInfo = TermWeight.Info.parseFrom(tfSource.getTopValue().get());
// if no content expansion fields then assume every field is permitted for unfielded content functions
TermFrequencyList.Zone twZone = new TermFrequencyList.Zone(fv.getField(), (contentExpansionFields == null || contentExpansionFields.isEmpty() || contentExpansionFields.contains(fv.getField())), TermFrequencyList.getEventId(key));
TermWeightPosition.Builder position = new TermWeightPosition.Builder();
for (int i = 0; i < twInfo.getTermOffsetCount(); i++) {
position.setTermWeightOffsetInfo(twInfo, i);
offsets.put(twZone, position.build());
position.reset();
}
} catch (InvalidProtocolBufferException e) {
log.error("Could not deserialize TermWeight protocol buffer for: " + source.getTopKey());
return null;
}
// First time looking up this term in a field
TermFrequencyList tfl = termOffsetMap.get(fv.getValue());
if (null == tfl) {
termOffsetMap.put(fv.getValue(), new TermFrequencyList(offsets));
} else {
// Merge in the offsets for the current field+term with all previous
// offsets from other fields in the same term
tfl.addOffsets(offsets);
}
try {
tfSource.next();
} catch (IOException ioe) {
log.error("Next failed: " + range, ioe);
break;
}
}
// Load the actual map into map that will be put into the JexlContext
Map<String, Object> map = new HashMap<>();
map.put(Constants.TERM_OFFSET_MAP_JEXL_VARIABLE_NAME, termOffsetMap);
return map;
}
Aggregations