Search in sources :

Example 16 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class RangeStreamScannerTest method beforeClass.

@BeforeClass
public static void beforeClass() throws Exception {
    InMemoryInstance instance = new InMemoryInstance();
    connector = instance.getConnector("", new PasswordToken(new byte[0]));
    connector.tableOperations().create(SHARD_INDEX);
    scannerFactory = new ScannerFactory(connector, 1);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(1024L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1);
    BatchWriter bw = connector.createBatchWriter(SHARD_INDEX, bwConfig);
    // FOO == 'bar' hits day 20190314 with 1 shard, each shard has 2 document ids.
    // This remains under the shards/day limit and under the documents/shard limit.
    bw.addMutation(buildMutation("FOO", "bar", "20190314", "datatype1", "A", "doc1", "doc2"));
    // This exceeds the shards/day limit and remains under the documents/shard limit.
    for (int ii = 0; ii < 15; ii++) {
        String shard = "20190317_" + ii;
        bw.addMutation(buildMutation("FOO", "baz", shard, "datatype1", "A", "doc1", "doc2"));
    }
    // FOO == 'boo' hits day 20190319 with 8 shards, each shard has 15 document ids.
    // This remains under the shards/day limit and under the documents/shard limit.
    List<String> docIds = new ArrayList<>(15);
    for (int ii = 0; ii < 15; ii++) {
        docIds.add("docId" + ii);
    }
    for (int jj = 0; jj < 8; jj++) {
        String shard = "20190319_" + jj;
        bw.addMutation(buildMutation("FOO", "boo", shard, "datatype1", "A", docIds));
    }
    // FOO == 'boohoo' hits day 20190319 with 15 shards, each shard has 25 document ids.
    // This exceeds the shards/day limit and exceeds the documents/shard limit.
    docIds = new ArrayList<>(25);
    for (int ii = 0; ii < 25; ii++) {
        docIds.add("docId" + ii);
    }
    for (int jj = 0; jj < 15; jj++) {
        String shard = "20190323_" + jj;
        bw.addMutation(buildMutation("FOO", "boohoo", shard, "datatype1", "A", docIds));
    }
    // Flush mutations and close the writer.
    bw.flush();
    bw.close();
    // Setup ShardQueryConfiguration
    config = new ShardQueryConfiguration();
    config.setDatatypeFilter(Sets.newHashSet("datatype1", "datatype2"));
    // Set begin/end date for query
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    // Set auths for query;
    Authorizations auth1 = new Authorizations("A", "B", "C");
    Authorizations auth2 = new Authorizations("A", "D", "E");
    Authorizations auth3 = new Authorizations("A", "F", "G");
    Set<Authorizations> auths = Sets.newHashSet(auth1, auth2, auth3);
    config.setAuthorizations(auths);
    // Build and set datatypes
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) ArrayList(java.util.ArrayList) InMemoryInstance(datawave.accumulo.inmemory.InMemoryInstance) Date(java.util.Date) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) NumberType(datawave.data.type.NumberType) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) BeforeClass(org.junit.BeforeClass)

Example 17 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class UnindexedNumericQueryTest method testRange.

@Test
public void testRange() throws Exception {
    log.info("------  testRange  ------");
    String min = "90";
    String max = "122";
    String ohio = "'ohio'";
    String iowa = "'iowa'";
    String query = "(" + CityField.STATE.name() + EQ_OP + ohio + OR_OP + CityField.STATE.name() + EQ_OP + iowa + ")" + AND_OP + "((_Bounded_ = true) && (" + CityField.NUM.name() + GT_OP + min + AND_OP + CityField.NUM.name() + LT_OP + max + "))";
    ShardQueryConfiguration config = (ShardQueryConfiguration) setupConfig(query);
    // verify NUM is NumberType
    String indexStr = config.getIndexedFieldDataTypesAsString();
    Assert.assertTrue(indexStr.contains(CityField.NUM.name() + ":" + NumberType.class.getName()));
    // NUM field should not be indexed
    Set<String> indexes = config.getIndexedFields();
    Assert.assertFalse(indexes.contains(CityField.NUM.name()));
    NumberType nt = new NumberType();
    String norm90 = nt.normalize(min);
    String norm122 = nt.normalize(max);
    Iterator<QueryData> queries = config.getQueries();
    Assert.assertTrue(queries.hasNext());
    QueryData data = queries.next();
    for (IteratorSetting it : data.getSettings()) {
        if (it.getIteratorClass().equals(QueryIterator.class.getName())) {
            Map<String, String> options = it.getOptions();
            String qo = options.get(QueryOptions.QUERY);
            Assert.assertTrue(qo.contains(norm90));
            Assert.assertTrue(qo.contains(norm122));
        }
    }
}
Also used : NumberType(datawave.data.type.NumberType) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) QueryData(datawave.webservice.query.configuration.QueryData) QueryIterator(datawave.query.iterator.QueryIterator) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) Test(org.junit.Test)

Example 18 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class UnindexedNumericQueryTest method testNumericTerm.

@Test
public void testNumericTerm() throws Exception {
    log.info("------  testNumericTerm  ------");
    String min = "115";
    String iowa = "'indiana'";
    String query = CityField.STATE.name() + EQ_OP + iowa + AND_OP + CityField.NUM.name() + GT_OP + min;
    ShardQueryConfiguration config = (ShardQueryConfiguration) setupConfig(query);
    // verify NUM is NumberType
    String indexStr = config.getIndexedFieldDataTypesAsString();
    Assert.assertTrue(indexStr.contains(CityField.NUM.name() + ":" + NumberType.class.getName()));
    // NUM field should not be indexed
    Set<String> indexes = config.getIndexedFields();
    Assert.assertFalse(indexes.contains(CityField.NUM.name()));
    NumberType nt = new NumberType();
    String norm90 = nt.normalize(min);
    Iterator<QueryData> queries = config.getQueries();
    Assert.assertTrue(queries.hasNext());
    QueryData data = queries.next();
    for (IteratorSetting it : data.getSettings()) {
        if (it.getIteratorClass().equals(QueryIterator.class.getName())) {
            Map<String, String> options = it.getOptions();
            String qo = options.get(QueryOptions.QUERY);
            Assert.assertTrue(qo.contains(norm90));
        }
    }
}
Also used : NumberType(datawave.data.type.NumberType) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) QueryData(datawave.webservice.query.configuration.QueryData) QueryIterator(datawave.query.iterator.QueryIterator) ShardQueryConfiguration(datawave.query.config.ShardQueryConfiguration) Test(org.junit.Test)

Example 19 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class GroupingTestWithModel method testCountingMapAgain.

@Test
public void testCountingMapAgain() {
    MarkingFunctions markingFunctions = new MarkingFunctions.Default();
    GroupingTransform.GroupCountingHashMap map = new GroupingTransform.GroupCountingHashMap(markingFunctions);
    GroupingTypeAttribute<?> attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
    attr1a.setColumnVisibility(new ColumnVisibility("A"));
    GroupingTypeAttribute<?> attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
    attr1b.setColumnVisibility(new ColumnVisibility("C"));
    Set<GroupingTypeAttribute<?>> seta = Sets.newHashSet(attr1a, attr1b);
    map.add(seta);
    GroupingTypeAttribute<?> attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
    attr2a.setColumnVisibility(new ColumnVisibility("B"));
    GroupingTypeAttribute<?> attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
    attr2b.setColumnVisibility(new ColumnVisibility("D"));
    Set<GroupingTypeAttribute<?>> setb = Sets.newHashSet(attr2a, attr2b);
    map.add(setb);
    // even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode
    Assert.assertEquals(seta.hashCode(), setb.hashCode());
    Assert.assertEquals(seta, setb);
    GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true);
    attr3a.setColumnVisibility(new ColumnVisibility("C"));
    GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true);
    attr3b.setColumnVisibility(new ColumnVisibility("D"));
    map.add(Sets.newHashSet(attr3a, attr3b));
    log.debug("map is: " + map);
    for (Map.Entry<Collection<GroupingTypeAttribute<?>>, Integer> entry : map.entrySet()) {
        for (Attribute<?> attr : entry.getKey()) {
            int count = entry.getValue();
            if (attr.getData().toString().equals("FOO")) {
                Assert.assertEquals(2, count);
                // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
                Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("5")) {
                Assert.assertEquals(2, count);
                // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
                Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("BAR")) {
                Assert.assertEquals(1, count);
                Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("6")) {
                Assert.assertEquals(1, count);
                Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility());
            }
        }
    }
}
Also used : MarkingFunctions(datawave.marking.MarkingFunctions) NumberType(datawave.data.type.NumberType) GroupingTypeAttribute(datawave.query.transformer.GroupingTransform.GroupingTypeAttribute) Collection(java.util.Collection) LcType(datawave.data.type.LcType) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 20 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class GroupingTest method testCountingMapAgain.

@Test
public void testCountingMapAgain() {
    MarkingFunctions markingFunctions = new MarkingFunctions.Default();
    GroupingTransform.GroupCountingHashMap map = new GroupingTransform.GroupCountingHashMap(markingFunctions);
    GroupingTypeAttribute<?> attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
    attr1a.setColumnVisibility(new ColumnVisibility("A"));
    GroupingTypeAttribute<?> attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
    attr1b.setColumnVisibility(new ColumnVisibility("C"));
    Set<GroupingTypeAttribute<?>> seta = Sets.newHashSet(attr1a, attr1b);
    map.add(seta);
    GroupingTypeAttribute<?> attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true);
    attr2a.setColumnVisibility(new ColumnVisibility("B"));
    GroupingTypeAttribute<?> attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true);
    attr2b.setColumnVisibility(new ColumnVisibility("D"));
    Set<GroupingTypeAttribute<?>> setb = Sets.newHashSet(attr2a, attr2b);
    map.add(setb);
    // even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode
    Assert.assertEquals(seta.hashCode(), setb.hashCode());
    Assert.assertEquals(seta, setb);
    GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true);
    attr3a.setColumnVisibility(new ColumnVisibility("C"));
    GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true);
    attr3b.setColumnVisibility(new ColumnVisibility("D"));
    map.add(Sets.newHashSet(attr3a, attr3b));
    log.debug("map is: " + map);
    for (Map.Entry<Collection<GroupingTypeAttribute<?>>, Integer> entry : map.entrySet()) {
        for (Attribute<?> attr : entry.getKey()) {
            int count = entry.getValue();
            if (attr.getData().toString().equals("FOO")) {
                Assert.assertEquals(2, count);
                // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
                Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("5")) {
                Assert.assertEquals(2, count);
                // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map
                Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("BAR")) {
                Assert.assertEquals(1, count);
                Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility());
            } else if (attr.getData().toString().equals("6")) {
                Assert.assertEquals(1, count);
                Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility());
            }
        }
    }
}
Also used : MarkingFunctions(datawave.marking.MarkingFunctions) NumberType(datawave.data.type.NumberType) GroupingTypeAttribute(datawave.query.transformer.GroupingTransform.GroupingTypeAttribute) Collection(java.util.Collection) LcType(datawave.data.type.LcType) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

NumberType (datawave.data.type.NumberType)28 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)21 Test (org.junit.Test)21 Type (datawave.data.type.Type)20 NoOpType (datawave.data.type.NoOpType)18 ScannerFactory (datawave.query.tables.ScannerFactory)13 MockMetadataHelper (datawave.query.util.MockMetadataHelper)13 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)13 RangeFactoryForTests.makeTestRange (datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange)9 QueryPlan (datawave.query.planner.QueryPlan)9 Range (org.apache.accumulo.core.data.Range)9 LcType (datawave.data.type.LcType)8 IpAddressType (datawave.data.type.IpAddressType)4 ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)3 Key (org.apache.accumulo.core.data.Key)3 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 MarkingFunctions (datawave.marking.MarkingFunctions)2 ValueTuple (datawave.query.attributes.ValueTuple)2 QueryIterator (datawave.query.iterator.QueryIterator)2