use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class RangeStreamScannerTest method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
InMemoryInstance instance = new InMemoryInstance();
connector = instance.getConnector("", new PasswordToken(new byte[0]));
connector.tableOperations().create(SHARD_INDEX);
scannerFactory = new ScannerFactory(connector, 1);
BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(1024L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1);
BatchWriter bw = connector.createBatchWriter(SHARD_INDEX, bwConfig);
// FOO == 'bar' hits day 20190314 with 1 shard, each shard has 2 document ids.
// This remains under the shards/day limit and under the documents/shard limit.
bw.addMutation(buildMutation("FOO", "bar", "20190314", "datatype1", "A", "doc1", "doc2"));
// This exceeds the shards/day limit and remains under the documents/shard limit.
for (int ii = 0; ii < 15; ii++) {
String shard = "20190317_" + ii;
bw.addMutation(buildMutation("FOO", "baz", shard, "datatype1", "A", "doc1", "doc2"));
}
// FOO == 'boo' hits day 20190319 with 8 shards, each shard has 15 document ids.
// This remains under the shards/day limit and under the documents/shard limit.
List<String> docIds = new ArrayList<>(15);
for (int ii = 0; ii < 15; ii++) {
docIds.add("docId" + ii);
}
for (int jj = 0; jj < 8; jj++) {
String shard = "20190319_" + jj;
bw.addMutation(buildMutation("FOO", "boo", shard, "datatype1", "A", docIds));
}
// FOO == 'boohoo' hits day 20190319 with 15 shards, each shard has 25 document ids.
// This exceeds the shards/day limit and exceeds the documents/shard limit.
docIds = new ArrayList<>(25);
for (int ii = 0; ii < 25; ii++) {
docIds.add("docId" + ii);
}
for (int jj = 0; jj < 15; jj++) {
String shard = "20190323_" + jj;
bw.addMutation(buildMutation("FOO", "boohoo", shard, "datatype1", "A", docIds));
}
// Flush mutations and close the writer.
bw.flush();
bw.close();
// Setup ShardQueryConfiguration
config = new ShardQueryConfiguration();
config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
// Set begin/end date for query
config.setBeginDate(new Date(0));
config.setEndDate(new Date(System.currentTimeMillis()));
// Set auths for query;
Authorizations auth1 = new Authorizations("A", "B", "C");
Authorizations auth2 = new Authorizations("A", "D", "E");
Authorizations auth3 = new Authorizations("A", "F", "G");
Set<Authorizations> auths = Sets.newHashSet(auth1, auth2, auth3);
config.setAuthorizations(auths);
// Build and set datatypes
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class UnindexedNumericQueryTest method testRange.
@Test
public void testRange() throws Exception {
log.info("------ testRange ------");
String min = "90";
String max = "122";
String ohio = "'ohio'";
String iowa = "'iowa'";
String query = "(" + CityField.STATE.name() + EQ_OP + ohio + OR_OP + CityField.STATE.name() + EQ_OP + iowa + ")" + AND_OP + "((_Bounded_ = true) && (" + CityField.NUM.name() + GT_OP + min + AND_OP + CityField.NUM.name() + LT_OP + max + "))";
ShardQueryConfiguration config = (ShardQueryConfiguration) setupConfig(query);
// verify NUM is NumberType
String indexStr = config.getIndexedFieldDataTypesAsString();
Assert.assertTrue(indexStr.contains(CityField.NUM.name() + ":" + NumberType.class.getName()));
// NUM field should not be indexed
Set<String> indexes = config.getIndexedFields();
Assert.assertFalse(indexes.contains(CityField.NUM.name()));
NumberType nt = new NumberType();
String norm90 = nt.normalize(min);
String norm122 = nt.normalize(max);
Iterator<QueryData> queries = config.getQueries();
Assert.assertTrue(queries.hasNext());
QueryData data = queries.next();
for (IteratorSetting it : data.getSettings()) {
if (it.getIteratorClass().equals(QueryIterator.class.getName())) {
Map<String, String> options = it.getOptions();
String qo = options.get(QueryOptions.QUERY);
Assert.assertTrue(qo.contains(norm90));
Assert.assertTrue(qo.contains(norm122));
}
}
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class UnindexedNumericQueryTest method testNumericTerm.
@Test
public void testNumericTerm() throws Exception {
log.info("------ testNumericTerm ------");
String min = "115";
String iowa = "'indiana'";
String query = CityField.STATE.name() + EQ_OP + iowa + AND_OP + CityField.NUM.name() + GT_OP + min;
ShardQueryConfiguration config = (ShardQueryConfiguration) setupConfig(query);
// verify NUM is NumberType
String indexStr = config.getIndexedFieldDataTypesAsString();
Assert.assertTrue(indexStr.contains(CityField.NUM.name() + ":" + NumberType.class.getName()));
// NUM field should not be indexed
Set<String> indexes = config.getIndexedFields();
Assert.assertFalse(indexes.contains(CityField.NUM.name()));
NumberType nt = new NumberType();
String norm90 = nt.normalize(min);
Iterator<QueryData> queries = config.getQueries();
Assert.assertTrue(queries.hasNext());
QueryData data = queries.next();
for (IteratorSetting it : data.getSettings()) {
if (it.getIteratorClass().equals(QueryIterator.class.getName())) {
Map<String, String> options = it.getOptions();
String qo = options.get(QueryOptions.QUERY);
Assert.assertTrue(qo.contains(norm90));
}
}
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class GroupingTestWithModel method testCountingMapAgain.
@Test
public void testCountingMapAgain() {
MarkingFunctions markingFunctions = new MarkingFunctions.Default();
GroupingTransform.GroupCountingHashMap map = new GroupingTransform.GroupCountingHashMap(markingFunctions);
GroupingTypeAttribute<?> attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
attr1a.setColumnVisibility(new ColumnVisibility("A"));
GroupingTypeAttribute<?> attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
attr1b.setColumnVisibility(new ColumnVisibility("C"));
Set<GroupingTypeAttribute<?>> seta = Sets.newHashSet(attr1a, attr1b);
map.add(seta);
GroupingTypeAttribute<?> attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
attr2a.setColumnVisibility(new ColumnVisibility("B"));
GroupingTypeAttribute<?> attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
attr2b.setColumnVisibility(new ColumnVisibility("D"));
Set<GroupingTypeAttribute<?>> setb = Sets.newHashSet(attr2a, attr2b);
map.add(setb);
// even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode
Assert.assertEquals(seta.hashCode(), setb.hashCode());
Assert.assertEquals(seta, setb);
GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true);
attr3a.setColumnVisibility(new ColumnVisibility("C"));
GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true);
attr3b.setColumnVisibility(new ColumnVisibility("D"));
map.add(Sets.newHashSet(attr3a, attr3b));
log.debug("map is: " + map);
for (Map.Entry<Collection<GroupingTypeAttribute<?>>, Integer> entry : map.entrySet()) {
for (Attribute<?> attr : entry.getKey()) {
int count = entry.getValue();
if (attr.getData().toString().equals("FOO")) {
Assert.assertEquals(2, count);
// the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("5")) {
Assert.assertEquals(2, count);
// the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("BAR")) {
Assert.assertEquals(1, count);
Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("6")) {
Assert.assertEquals(1, count);
Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility());
}
}
}
}
use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.
the class GroupingTest method testCountingMapAgain.
@Test
public void testCountingMapAgain() {
MarkingFunctions markingFunctions = new MarkingFunctions.Default();
GroupingTransform.GroupCountingHashMap map = new GroupingTransform.GroupCountingHashMap(markingFunctions);
GroupingTypeAttribute<?> attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
attr1a.setColumnVisibility(new ColumnVisibility("A"));
GroupingTypeAttribute<?> attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
attr1b.setColumnVisibility(new ColumnVisibility("C"));
Set<GroupingTypeAttribute<?>> seta = Sets.newHashSet(attr1a, attr1b);
map.add(seta);
GroupingTypeAttribute<?> attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
attr2a.setColumnVisibility(new ColumnVisibility("B"));
GroupingTypeAttribute<?> attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
attr2b.setColumnVisibility(new ColumnVisibility("D"));
Set<GroupingTypeAttribute<?>> setb = Sets.newHashSet(attr2a, attr2b);
map.add(setb);
// even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode
Assert.assertEquals(seta.hashCode(), setb.hashCode());
Assert.assertEquals(seta, setb);
GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true);
attr3a.setColumnVisibility(new ColumnVisibility("C"));
GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true);
attr3b.setColumnVisibility(new ColumnVisibility("D"));
map.add(Sets.newHashSet(attr3a, attr3b));
log.debug("map is: " + map);
for (Map.Entry<Collection<GroupingTypeAttribute<?>>, Integer> entry : map.entrySet()) {
for (Attribute<?> attr : entry.getKey()) {
int count = entry.getValue();
if (attr.getData().toString().equals("FOO")) {
Assert.assertEquals(2, count);
// the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("5")) {
Assert.assertEquals(2, count);
// the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("BAR")) {
Assert.assertEquals(1, count);
Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility());
} else if (attr.getData().toString().equals("6")) {
Assert.assertEquals(1, count);
Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility());
}
}
}
}
Aggregations