use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class MutableMetadataHandler method delete.
/**
* Delete the current K,V from the event, put in a history element
*
* @param writer
* @param currentEntryList
* @param isIndexed
* @param isReverseIndexed
* @param isContentField
* @param dataTypes
* @param user
* @param mode
* @param ts
* @param purgeTokens
* If set true, then this will delete all tokens for a field as well.
* @param insertHistory
* @throws Exception
*/
protected void delete(MultiTableBatchWriter writer, Connector con, Set<Authorizations> userAuths, List<Pair<Key, Value>> currentEntryList, boolean isIndexOnlyField, boolean isIndexed, boolean isReverseIndexed, boolean isContentField, Set<Type<?>> dataTypes, String user, MODE mode, long ts, boolean purgeTokens, boolean insertHistory) throws Exception {
for (Pair<Key, Value> currentEntry : currentEntryList) {
ColumnVisibility viz = currentEntry.getFirst().getColumnVisibilityParsed();
DatawaveKey key = new DatawaveKey(currentEntry.getFirst());
String shardId = key.getRow().toString();
long currentEntryTimestamp = currentEntry.getFirst().getTimestamp();
if (key.getType().equals(KeyType.INDEX_EVENT)) {
// Only the delete the fi key
Mutation e = new Mutation(currentEntry.getFirst().getRow());
e.putDelete(currentEntry.getFirst().getColumnFamily(), currentEntry.getFirst().getColumnQualifier(), viz, currentEntryTimestamp);
writer.getBatchWriter(this.getEventTableName()).addMutation(e);
} else if (key.getType().equals(KeyType.EVENT)) {
Mutation m = new Mutation(key.getFieldName());
// Decrement the frequency (metadata table)
m.put(ColumnFamilyConstants.COLF_F, new Text(key.getDataType() + NULL_BYTE + DateHelper.format(currentEntryTimestamp)), new Value(SummingCombiner.VAR_LEN_ENCODER.encode(-1L)));
// Remove the event field.
Mutation e = new Mutation(currentEntry.getFirst().getRow());
if (!isIndexOnlyField) {
e.putDelete(currentEntry.getFirst().getColumnFamily(), currentEntry.getFirst().getColumnQualifier(), viz, currentEntryTimestamp);
}
// Remove the content column
if (isContentField) {
ContentIterable dKeys = getContentKeys(con, this.getEventTableName(), userAuths, shardId, key.getDataType(), key.getUid());
try {
for (Key dKey : dKeys) {
e.putDelete(dKey.getColumnFamily(), dKey.getColumnQualifier(), dKey.getColumnVisibilityParsed(), dKey.getTimestamp());
}
} finally {
dKeys.close();
}
}
long tsToDay = (ts / MS_PER_DAY) * MS_PER_DAY;
FieldIndexIterable fiKeys = getFieldIndexKeys(con, this.getEventTableName(), userAuths, shardId, key.getDataType(), key.getUid(), key.getFieldName(), key.getFieldValue(), dataTypes, purgeTokens);
try {
for (Key fiKey : fiKeys) {
// Remove the field index entry
e.putDelete(fiKey.getColumnFamily(), fiKey.getColumnQualifier(), fiKey.getColumnVisibilityParsed(), fiKey.getTimestamp());
DatawaveKey fiKeyParsed = new DatawaveKey(fiKey);
// Remove the term frequency entry
e.putDelete(ColumnFamilyConstants.COLF_TF.toString(), fiKeyParsed.getDataType() + NULL_BYTE + fiKeyParsed.getUid() + NULL_BYTE + fiKeyParsed.getFieldValue() + NULL_BYTE + fiKeyParsed.getFieldName(), fiKey.getColumnVisibilityParsed(), fiKey.getTimestamp());
// Create a UID object for the Value which will remove this UID
Builder uidBuilder = Uid.List.newBuilder();
uidBuilder.setIGNORE(false);
uidBuilder.setCOUNT(-1);
uidBuilder.addUID(fiKeyParsed.getUid());
Uid.List uidList = uidBuilder.build();
Value val = new Value(uidList.toByteArray());
// buffer the global indexes cq
String cq = shardId + NULL_BYTE + fiKeyParsed.getDataType();
// Remove the global index entry by adding the value
Mutation i = new Mutation(fiKeyParsed.getFieldValue());
i.put(fiKeyParsed.getFieldName(), cq, fiKey.getColumnVisibilityParsed(), tsToDay, val);
writer.getBatchWriter(this.getIndexTableName()).addMutation(i);
// Remove the reverse global index entry
if (isReverseIndexed) {
String reverseIndexedValue = StringUtils.reverse(fiKeyParsed.getFieldValue());
Mutation ri = new Mutation(reverseIndexedValue);
ri.put(fiKeyParsed.getFieldName(), cq, viz, tsToDay, val);
writer.getBatchWriter(this.getReverseIndexTableName()).addMutation(ri);
}
}
} finally {
fiKeys.close();
}
if (e.size() > 0) {
writer.getBatchWriter(this.getEventTableName()).addMutation(e);
}
writer.getBatchWriter(this.getMetadataTableName()).addMutation(m);
if (!isIndexOnlyField && insertHistory) {
insertHistory(writer, shardId, key.getDataType(), key.getUid(), viz, key.getFieldName(), key.getFieldValue(), ts, isIndexOnlyField, isIndexed, isReverseIndexed, dataTypes, user, mode);
}
}
}
writer.flush();
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocKeep.
@Test
public void apply_buildDocKeep() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD1");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1'"), Collections.emptySet());
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result != null);
DatawaveKey parsedResult = new DatawaveKey(result);
assertTrue(parsedResult.getDataType().equals("dataType1"));
assertTrue(parsedResult.getUid().equals("123.345.456"));
assertTrue(parsedResult.getFieldName().equals("FIELD1"));
assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
// test that the doc is empty
assertTrue(doc.size() == 2);
assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
assertTrue(doc.get("FIELD1").getData().toString().equals("VALUE1"));
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class CardinalitySummation method apply.
/*
* (non-Javadoc)
*
* @see com.google.common.base.Function#apply(java.lang.Object)
*/
@SuppressWarnings("unchecked")
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
Document currentDoc = new Document();
Key topKey = input.getKey();
// reduce the key to the document key pieces only and a max cq in order to ensure the top key
// sorts after the pieces it is summarizing.
topKey = new Key(topKey.getRow(), topKey.getColumnFamily(), MAX_UNICODE);
DatawaveKey parser = new DatawaveKey(topKey);
currentDoc = input.getValue();
Map<?, ?> currentAttr = currentDoc.getDictionary();
for (Entry<?, ?> attrE : currentAttr.entrySet()) {
Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
TreeMultimap<String, Attribute<?>> tmpMap = TreeMultimap.create();
if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
if (attr.getValue() instanceof Attributes) {
Attributes attrs = (Attributes) attr.getValue();
NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
for (Attribute<?> myAttribute : attrs.getAttributes()) {
if (log.isTraceEnabled())
log.trace("Attributes for " + attr.getKey() + " " + attributes.iterator().hasNext());
if (!attributes.isEmpty()) {
boolean foundAmongOthers = false;
for (Attribute<?> thoseAttributes : attributes) {
if (myAttribute instanceof Cardinality) {
if (((Cardinality) myAttribute).equals(thoseAttributes)) {
Cardinality card = (Cardinality) thoseAttributes;
Cardinality otherCard = (Cardinality) myAttribute;
merge(card, otherCard, parser, merge);
if (log.isTraceEnabled())
log.trace("Offering to " + attr.getKey() + " value " + card.getContent().getFloorValue() + " " + card.getContent().getCeilingValue());
foundAmongOthers = true;
break;
}
} else
throw new RuntimeException("Have " + myAttribute.getClass());
}
if (!foundAmongOthers) {
if (log.isTraceEnabled())
log.trace("put attributes " + attr.getKey() + " " + myAttribute.getData());
tmpMap.put(attr.getKey(), myAttribute);
}
newDocumentAttributes.putAll(tmpMap);
} else {
if (log.isTraceEnabled())
log.trace("adding attributes " + attr.getKey() + " " + myAttribute.getData());
newDocumentAttributes.put(attr.getKey(), myAttribute);
}
}
} else {
if (log.isTraceEnabled())
log.trace("Testing " + attr.getKey() + " " + attr.getValue().getData());
NavigableSet<Attribute<? extends Comparable<?>>> attributes = newDocumentAttributes.get(attr.getKey());
Attribute<?> attribute = attributes.floor(attr.getValue());
boolean found = false;
for (Attribute<?> thoseAttributes : attributes) {
if (thoseAttributes.equals(attr.getValue())) {
if (log.isTraceEnabled())
log.trace("found for " + attr.getKey() + " " + thoseAttributes.getData());
Cardinality card = (Cardinality) thoseAttributes;
Cardinality otherCard = (Cardinality) attr.getValue();
merge(card, otherCard, parser, merge);
found = true;
break;
} else {
}
}
if (!found) {
if (log.isTraceEnabled())
log.trace("Don't have " + attr.getKey() + " " + attr.getValue().getData());
newDocumentAttributes.put(attr.getKey(), attr.getValue());
}
}
}
}
referenceDocument = new Document();
if (log.isTraceEnabled())
log.trace("entries" + newDocumentAttributes.entries());
referenceDocument.putAll(newDocumentAttributes.entries().iterator(), false);
if (log.isTraceEnabled())
log.trace("currentDoc" + referenceDocument);
referenceKey = topKey;
return Maps.immutableEntry(topKey, referenceDocument);
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class TLDTermFrequencyAggregatorTest method apply_buildDocKeep.
@Test
public void apply_buildDocKeep() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "FIELD2", "VALUE2", "dataType1", "123.345.456.1", 10), new Value());
treeMap.put(getTF("123", "FIELD3", "VALUE3", "dataType1", "123.345.456.2", 10), new Value());
treeMap.put(getTF("123", "FIELD4", "VALUE4", "dataType1", "123.345.456.3", 10), new Value());
treeMap.put(getTF("123", "FIELD5", "VALUE5", "dataType1", "123.345.456.4", 10), new Value());
treeMap.put(getTF("123", "FIELD6", "VALUE6", "dataType1", "123.345.456.5", 10), new Value());
treeMap.put(getTF("123", "FIELD7", "VALUE7", "dataType1", "123.345.456.6", 10), new Value());
treeMap.put(getTF("123", "FIELD8", "VALUE8", "dataType1", "123.345.456.7", 10), new Value());
treeMap.put(getTF("123", "FIELD9", "VALUE9", "dataType1", "123.345.456.8", 10), new Value());
treeMap.put(getTF("123", "FIELD10", "VALUE10", "dataType1", "123.345.456.9", 10), new Value());
treeMap.put(getTF("123", "FIELD2", "VALUE11", "dataType1", "123.345.456.10.1", 10), new Value());
treeMap.put(getTF("123", "FIELD1", "VALUE12", "dataType1", "123.345.456.11.1.1", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "123.345.457", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD1");
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD1 == 'VALUE1' && FIELD2 == 'VALUE2'"), Collections.emptySet());
aggregator = new TLDTermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result != null);
DatawaveKey parsedResult = new DatawaveKey(result);
assertTrue(parsedResult.getDataType().equals("dataType1"));
assertTrue(parsedResult.getUid().equals("123.345.456"));
assertTrue(parsedResult.getFieldName(), parsedResult.getFieldName().equals("FIELD1"));
assertTrue(parsedResult.getFieldValue().equals("VALUE1"));
// test that the doc is empty
assertTrue(doc.size() == 5);
assertTrue(doc.get("RECORD_ID").getData().equals("123/dataType1/123.345.456"));
assertTrue(((Set<TypeAttribute>) doc.get("FIELD1").getData()).size() == 2);
Iterator<TypeAttribute> i = ((Set<TypeAttribute>) doc.get("FIELD1").getData()).iterator();
List<String> expected = new ArrayList<>();
expected.add("VALUE1");
expected.add("VALUE12");
while (i.hasNext()) {
TypeAttribute ta = i.next();
assertTrue(ta.isToKeep());
assertTrue(expected.remove(ta.getData().toString()));
}
assertTrue(expected.size() == 0);
assertTrue(((Set<TypeAttribute>) doc.get("FIELD2").getData()).size() == 2);
i = ((Set<TypeAttribute>) doc.get("FIELD2").getData()).iterator();
expected = new ArrayList<>();
expected.add("VALUE2");
expected.add("VALUE11");
while (i.hasNext()) {
TypeAttribute ta = i.next();
assertTrue(ta.isToKeep());
assertTrue(expected.remove(ta.getData().toString()));
}
assertTrue(expected.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "123.345.457", 10)));
}
use of datawave.query.data.parsers.DatawaveKey in project datawave by NationalSecurityAgency.
the class EventDataQueryExpressionFilter method apply.
protected boolean apply(Key key, boolean update) {
if (!this.initialized) {
throw new RuntimeException("The EventDataQueryExpressionFilter was not initialized");
}
final DatawaveKey datawaveKey = new DatawaveKey(key);
final String fieldName = JexlASTHelper.deconstructIdentifier(datawaveKey.getFieldName(), false);
if (update) {
return this.filters.containsKey(fieldName) && this.filters.get(fieldName).apply(key);
} else {
return this.filters.containsKey(fieldName) && this.filters.get(fieldName).peek(key);
}
}
Aggregations