use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class JexlEvaluation method apply.
@Override
public boolean apply(Tuple3<Key, Document, DatawaveJexlContext> input) {
Object o = script.execute(input.third());
if (log.isTraceEnabled()) {
log.trace("Evaluation of " + query + " against " + input.third() + " returned " + o);
}
boolean matched = isMatched(o);
// Add delayed info to document
if (matched && input.third() instanceof DelayedNonEventIndexContext) {
((DelayedNonEventIndexContext) input.third()).populateDocument(input.second());
}
if (arithmetic instanceof HitListArithmetic) {
HitListArithmetic hitListArithmetic = (HitListArithmetic) arithmetic;
if (matched) {
Document document = input.second();
Attributes attributes = new Attributes(input.second().isToKeep());
for (ValueTuple hitTuple : hitListArithmetic.getHitTuples()) {
ColumnVisibility cv = null;
String term = hitTuple.getFieldName() + ':' + hitTuple.getValue();
if (hitTuple.getSource() != null) {
cv = hitTuple.getSource().getColumnVisibility();
}
// fall back to extracting column visibility from document
if (cv == null) {
// get the visibility for the record with this hit
cv = HitListArithmetic.getColumnVisibilityForHit(document, term);
// if no visibility computed, then there were no hits that match fields still in the document......
}
if (cv != null) {
// unused
// will force an update to make the metadata valid
long timestamp = document.getTimestamp();
Content content = new Content(term, document.getMetadata(), document.isToKeep());
content.setColumnVisibility(cv);
attributes.add(content);
}
}
if (attributes.size() > 0) {
document.put(HIT_TERM_FIELD, attributes);
}
}
hitListArithmetic.clear();
}
return matched;
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class LimitFields method apply.
@Override
public Entry<Key, Document> apply(Entry<Key, Document> entry) {
// key is the limited field name with _ORIGINAL_COUNT appended,
// value will be set to the original count of that field in the document
Map<String, Integer> limitedFieldCounts = new HashMap<>();
Document document = entry.getValue();
Map<String, String> hitTermMap = this.getHitTermMap(document);
Multimap<String, Attribute<? extends Comparable<?>>> reducedMap = LinkedListMultimap.create();
Map<String, Integer> countForFieldMap = Maps.newHashMap();
// maps from the key with NO grouping context to a multimap of
// key WITH grouping context to attributes:
// DIRECTION : [DIRECTION.1 : [over,under], DIRECTION.2 : [sideways,down]]
LoadingCache<String, Multimap<String, Attribute<? extends Comparable<?>>>> hits = CacheBuilder.newBuilder().build(new CacheLoader<String, Multimap<String, Attribute<? extends Comparable<?>>>>() {
public Multimap<String, Attribute<? extends Comparable<?>>> load(String key) {
return LinkedListMultimap.create();
}
});
// maps from the key with NO grouping context to a multimap of
// key WITH grouping context to attributes:
// DIRECTION : [DIRECTION.1 : [over,under], DIRECTION.2 : [sideways,down]]
@SuppressWarnings("serial") LoadingCache<String, Multimap<String, Attribute<? extends Comparable<?>>>> misses = CacheBuilder.newBuilder().build(new CacheLoader<String, Multimap<String, Attribute<? extends Comparable<?>>>>() {
public Multimap<String, Attribute<? extends Comparable<?>>> load(String key) {
return LinkedListMultimap.create();
}
});
for (Map.Entry<String, Attribute<? extends Comparable<?>>> de : document.entrySet()) {
String keyWithGrouping = de.getKey();
String keyNoGrouping = keyWithGrouping;
// if we have grouping context on, remove the grouping context
if (keyNoGrouping.indexOf('.') != -1) {
keyNoGrouping = keyNoGrouping.substring(0, keyNoGrouping.indexOf('.'));
}
// limit value for _ANYFIELD_
if (this.limitFieldsMap.containsKey("_ANYFIELD_") && this.limitFieldsMap.containsKey(keyNoGrouping) == false) {
this.limitFieldsMap.put(keyNoGrouping, this.limitFieldsMap.get("_ANYFIELD_"));
log.trace("added " + keyNoGrouping + " - " + this.limitFieldsMap.get(keyNoGrouping) + " to the limitFieldsMap because of the _ANYFIELD_ entry");
}
if (this.limitFieldsMap.containsKey(keyNoGrouping)) {
// look for the key without the grouping context
if (log.isTraceEnabled())
log.trace("limitFieldsMap contains " + keyNoGrouping);
Attribute<?> attr = de.getValue();
// used below if you un-comment to get all hits
int limit = this.limitFieldsMap.get(keyNoGrouping);
if (attr instanceof Attributes) {
Attributes attrs = (Attributes) attr;
Set<Attribute<? extends Comparable<?>>> attrSet = attrs.getAttributes();
for (Attribute<? extends Comparable<?>> value : attrSet) {
manageHitsAndMisses(keyWithGrouping, keyNoGrouping, value, hitTermMap, hits, misses, countForFieldMap);
}
} else {
manageHitsAndMisses(keyWithGrouping, keyNoGrouping, attr, hitTermMap, hits, misses, countForFieldMap);
}
}
}
for (String keyNoGrouping : countForFieldMap.keySet()) {
int limit = this.limitFieldsMap.get(keyNoGrouping);
Multimap<String, Attribute<? extends Comparable<?>>> hitMap = hits.getUnchecked(keyNoGrouping);
for (String keyWithGrouping : hitMap.keySet()) {
for (Attribute<? extends Comparable<?>> value : hitMap.get(keyWithGrouping)) {
// if(limit <= 0) break; // comment this line if you want to get ALL hits even if the limit is exceeded
reducedMap.put(keyWithGrouping, value);
limit--;
}
}
Multimap<String, Attribute<? extends Comparable<?>>> missMap = misses.getUnchecked(keyNoGrouping);
for (String keyWithGrouping : missMap.keySet()) {
for (Attribute<? extends Comparable<?>> value : missMap.get(keyWithGrouping)) {
if (limit <= 0)
break;
reducedMap.put(keyWithGrouping, value);
limit--;
}
}
if (log.isTraceEnabled()) {
log.trace("reducedMap:" + reducedMap);
log.trace("mapOfHits:" + hits.asMap());
log.trace("mapOfMisses:" + misses.asMap());
}
// only generate an original count if a field was reduced
if (countForFieldMap.get(keyNoGrouping) > this.limitFieldsMap.get(keyNoGrouping)) {
limitedFieldCounts.put(keyNoGrouping + ORIGINAL_COUNT_SUFFIX, countForFieldMap.get(keyNoGrouping));
}
}
// mutate the document with the changes collected in the above loop
applyCounts(document, limitedFieldCounts);
Map<String, Multimap<String, Attribute<? extends Comparable<?>>>> toRemove = Maps.newLinkedHashMap();
toRemove.putAll(hits.asMap());
toRemove.putAll(misses.asMap());
makeReduction(document, toRemove, reducedMap);
return entry;
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class IndexOnlyKeyToDocumentData method next.
@Override
public Entry<DocumentData, Document> next() {
final Entry<Key, Value> next;
try {
next = this.seekNext(false);
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.SEEK_NEXT_ELEMENT_ERROR, e);
throw new DatawaveFatalQueryException(qe);
}
final Entry<DocumentData, Document> entry;
if (null != next) {
final List<Entry<Key, Value>> keyValues = new LinkedList<>();
keyValues.add(next);
Key docKey = getDocKey(next.getKey());
final DocumentData documentData = new DocumentData(this.iteratorDocumentKey, Collections.singleton(docKey), keyValues, true);
entry = Maps.immutableEntry(documentData, this.iteratorDocument);
} else if (next == ITERATOR_COMPLETE_KEY) {
QueryException qe = new QueryException(DatawaveErrorCode.FETCH_NEXT_ELEMENT_ERROR, MessageFormat.format("Fieldname: {0}, Range: {1}", this.fieldName, this.parent));
throw (NoSuchElementException) (new NoSuchElementException().initCause(qe));
} else {
entry = null;
}
return entry;
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class DocumentDataIterator method findNextDocument.
protected void findNextDocument() {
documentData = null;
try {
Text cf = new Text();
/*
* Given that we are already at a document key, this method will continue to advance the underlying source until it is either exhausted (hasTop()
* returns false), the returned key is not in the totalRange, and the current top key shares the same row and column family as the source's next
* key.
*/
while (documentData == null && source.hasTop()) {
Key k = source.getTopKey();
if (log.isTraceEnabled())
log.trace("Sought to " + k);
k.getColumnFamily(cf);
if (!isEventKey(k)) {
if (cf.find("fi\0") == 0) {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'fi')");
}
// Try to do an optimized jump over the field index
cf.set("fi\1");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
} else if (cf.getLength() == 1 && cf.charAt(0) == 'd') {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'd'");
}
// Try to do an optimized jump over the raw documents
cf.set("d\0");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
} else if (cf.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f') {
if (log.isDebugEnabled()) {
log.debug("Seeking over 'tf'");
}
// Try to do an optimized jump over the term frequencies
cf.set("tf\0");
source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()), columnFamilies, inclusive);
} else {
if (log.isDebugEnabled()) {
log.debug("Next()'ing over the current key");
}
source.next();
}
} else {
Key pointer = source.getTopKey();
if (dataTypeFilter.apply(pointer)) {
this.documentData = this.documentMapper.apply(Maps.immutableEntry(pointer, new Document()));
}
// now bounce to the next document as the documentMapper may have moved the source considerably
Key nextDocKey = this.evaluationFilter != null ? this.evaluationFilter.getStopKey(pointer) : pointer.followingKey(PartialKey.ROW_COLFAM);
if (totalRange.contains(nextDocKey)) {
Range nextCF = new Range(nextDocKey, true, totalRange.getEndKey(), totalRange.isEndKeyInclusive());
source.seek(nextCF, columnFamilies, inclusive);
} else {
// skip to the end
Range nextCF = new Range(totalRange.getEndKey(), false, totalRange.getEndKey().followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME_DEL), false);
source.seek(nextCF, columnFamilies, inclusive);
}
}
}
} catch (IOException e) {
throw new RuntimeException("Could not seek in findNextDocument", e);
}
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class DocumentDeserializer method apply.
@Override
public Entry<Key, Document> apply(Entry<Key, Value> from) {
InputStream is = DocumentSerialization.consumeHeader(from.getValue().get());
Document document = deserialize(is);
return Maps.immutableEntry(from.getKey(), document);
}
Aggregations