use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class WritableDocumentDeserializer method deserialize.
@Override
public Document deserialize(InputStream data) {
DataInputStream dis = new DataInputStream(data);
Document d = new Document();
try {
d.readFields(dis);
} catch (IOException e) {
throw new RuntimeException("Could not convert Document through write().", e);
}
return d;
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class GroupingTransform method flatten.
/**
* <pre>
* flush used the countingMap:
* [[MALE, 16],
* [MALE, 20],
* [40, MALE],
* [40, MALE],
* [MALE, 22] x 2,
* [FEMALE, 18],
* [MALE, 24],
* [20, MALE],
* [30, MALE],
* [FEMALE, 18],
* [34, MALE]]
*
* to create documents list: [
* {AGE=16, COUNT=1, GENDER=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {COUNT=1, ETA=20, GENERE=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {COUNT=1, ETA=40, GENERE=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {AGE=40, COUNT=1, GENDER=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {COUNT=2, ETA=22, GENERE=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {AGE=18, COUNT=1, GENDER=FEMALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {COUNT=1, ETA=24, GENERE=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {AGE=20, COUNT=1, GENDER=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {AGE=30, COUNT=1, GENDER=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {COUNT=1, ETA=18, GENERE=FEMALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false,
* {AGE=34, COUNT=1, GENDER=MALE}:20130101_0 test%00;-d5uxna.msizfm.-oxy0iu: [ALL] 1356998400000 false]
*
* which is then flattened to just one document with the fields and counts correlated with a grouping context suffix:
*
* {
* AGE.0=16, GENDER.0=MALE, COUNT.0=1,
* ETA.1=20, GENERE.1=MALE, COUNT.1=1,
* ETA.2=40, GENERE.2=MALE, COUNT.2=1,
* AGE.3=40, GENDER.3=MALE, COUNT.3=1,
* ETA.4=22, GENERE.4=MALE, COUNT.4=2,
* AGE.5=18, GENDER.5=FEMALE, COUNT.5=1,
* ETA.6=24, GENERE.6=MALE, COUNT.6=1,
* AGE.7=20, GENDER.7=MALE, COUNT.7=1,
* AGE.8=30, GENDER.8=MALE, COUNT.8=1,
* ETA.9=18, GENERE.9=FEMALE, COUNT.9=1,
* AGE.A=34, GENDER.A=MALE, COUNT.A=1,
* }
* </pre>
*
* The Attributes, which have had their visibilities merged, are copied into normal TypeAttributes for serialization to the webserver.
*
* @param documents
*/
private void flatten(List<Document> documents) {
log.trace("flatten {}", documents);
Document theDocument = new Document(documents.get(documents.size() - 1).getMetadata(), true);
int context = 0;
Set<ColumnVisibility> visibilities = new HashSet<>();
for (Document document : documents) {
log.trace("document: {}", document);
for (Entry<String, Attribute<? extends Comparable<?>>> entry : document.entrySet()) {
String name = entry.getKey();
visibilities.add(entry.getValue().getColumnVisibility());
Attribute<? extends Comparable<?>> attribute = entry.getValue();
attribute.setColumnVisibility(entry.getValue().getColumnVisibility());
// call copy() on the GroupingTypeAttribute to get a plain TypeAttribute
// instead of a GroupingTypeAttribute that is package protected and won't serialize
theDocument.put(name + "." + Integer.toHexString(context).toUpperCase(), (TypeAttribute) attribute.copy(), true, false);
}
context++;
}
ColumnVisibility combinedVisibility = combine(visibilities);
log.trace("combined visibilities: {} to {}", visibilities, combinedVisibility);
theDocument.setColumnVisibility(combinedVisibility);
documents.clear();
log.trace("flattened document: {}", theDocument);
documents.add(theDocument);
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class DocumentTransformerTest method transform_noPrimaryToSecondaryMapSetTest.
@Test
public void transform_noPrimaryToSecondaryMapSetTest() throws MarkingFunctions.Exception {
Key key = new Key("shard", "dataType" + Constants.NULL + "uid");
Value value = new Value();
AbstractMap.SimpleEntry<Key, Value> entry = new AbstractMap.SimpleEntry<>(key, value);
Document d = new Document();
basicExpects(d, key, entry);
PowerMock.replayAll();
transformer = new DocumentTransformer(mockLogic, mockQuery, mockMarkingFunctions, mockResponseFactory, true);
SimpleEvent event = (SimpleEvent) transformer.transform(entry);
PowerMock.verifyAll();
Assert.assertNotNull(event);
Assert.assertEquals(1, event.getFields().size());
Assert.assertEquals("field1", event.getFields().get(0).getName());
Assert.assertEquals("5", event.getFields().get(0).getValueString());
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class UniqueTransformTest method testUniquenessWithTimingMetric.
@Test
public void testUniquenessWithTimingMetric() {
List<Document> input = new ArrayList<>();
List<Document> expected = new ArrayList<>();
String MARKER_STRING = "\u2735FinalDocument\u2735";
TimingMetadata timingMetadata = new TimingMetadata();
timingMetadata.setNextCount(5l);
givenInputDocument(MARKER_STRING).withKeyValue(LogTiming.TIMING_METADATA, timingMetadata.toString()).isExpectedToBeUnique();
givenInputDocument().withKeyValue("ATTR0", randomValues.get(0)).isExpectedToBeUnique();
givenInputDocument().withKeyValue("ATTR1", randomValues.get(1)).isExpectedToBeUnique();
givenInputDocument().withKeyValue("ATTR1", randomValues.get(2));
givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0");
assertUniqueDocuments();
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class UniqueTransformTest method countUniqueness.
private int countUniqueness(List<Document> input, Set<String> fields) {
Set<String> uniqueValues = new HashSet<>();
for (Document document : input) {
Multimap<String, String> fieldValues = getFieldValues(document, fields);
uniqueValues.add(getString(fieldValues));
}
return uniqueValues.size();
}
Aggregations