Search in sources :

Example 1 with HBasePropReg

use of org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropReg in project gradoop by dbs-leipzig.

the class HBaseDataSinkSourceTest method testReadWithPropRegPredicate.

/**
 * Test reading a graph collection from {@link HBaseDataSource}
 * with a {@link HBasePropReg} predicate on each graph element
 *
 * @throws Exception on failure
 */
@Test(dataProvider = "store index")
public void testReadWithPropRegPredicate(int storeIndex) throws Exception {
    // Extract parts of social graph to filter for
    List<EPGMGraphHead> graphHeads = Lists.newArrayList(getSocialGraphHeads()).stream().filter(g -> g.hasProperty(PROP_INTEREST)).filter(g -> g.getPropertyValue(PROP_INTEREST).getString().matches(PATTERN_GRAPH_PROP.pattern())).collect(Collectors.toList());
    List<EPGMEdge> edges = Lists.newArrayList(getSocialEdges()).stream().filter(e -> e.hasProperty(PROP_STATUS)).filter(e -> e.getPropertyValue(PROP_STATUS).getString().matches(PATTERN_EDGE_PROP.pattern())).collect(Collectors.toList());
    List<EPGMVertex> vertices = Lists.newArrayList(getSocialVertices()).stream().filter(v -> v.hasProperty(PROP_NAME)).filter(v -> v.getPropertyValue(PROP_NAME).getString().matches(PATTERN_VERTEX_PROP.pattern())).collect(Collectors.toList());
    // Define HBase source
    HBaseDataSource hBaseDataSource = new HBaseDataSource(epgmStores[storeIndex], getConfig());
    // Apply graph predicate
    hBaseDataSource = hBaseDataSource.applyGraphPredicate(Query.elements().fromAll().where(HBaseFilters.propReg(PROP_INTEREST, PATTERN_GRAPH_PROP)));
    // Apply edge predicate
    hBaseDataSource = hBaseDataSource.applyEdgePredicate(Query.elements().fromAll().where(HBaseFilters.propReg(PROP_STATUS, PATTERN_EDGE_PROP)));
    // Apply vertex predicate
    hBaseDataSource = hBaseDataSource.applyVertexPredicate(Query.elements().fromAll().where(HBaseFilters.propReg(PROP_NAME, PATTERN_VERTEX_PROP)));
    assertTrue(hBaseDataSource.isFilterPushedDown());
    GraphCollection graphCollection = hBaseDataSource.getGraphCollection();
    Collection<EPGMGraphHead> loadedGraphHeads = graphCollection.getGraphHeads().collect();
    Collection<EPGMVertex> loadedVertices = graphCollection.getVertices().collect();
    Collection<EPGMEdge> loadedEdges = graphCollection.getEdges().collect();
    assertEquals(loadedGraphHeads.size(), 1);
    assertEquals(loadedEdges.size(), 2);
    assertEquals(loadedVertices.size(), 2);
    validateElementCollections(graphHeads, loadedGraphHeads);
    validateElementCollections(vertices, loadedVertices);
    validateGraphElementCollections(vertices, loadedVertices);
    validateElementCollections(edges, loadedEdges);
    validateGraphElementCollections(edges, loadedEdges);
}
Also used : HBaseLabelReg(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBaseLabelReg) GradoopTestUtils.validateGraphElementCollections(org.gradoop.common.GradoopTestUtils.validateGraphElementCollections) DataProvider(org.testng.annotations.DataProvider) HBaseElementFilter(org.gradoop.storage.hbase.impl.predicate.filter.api.HBaseElementFilter) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) HBasePropLargerThan(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropLargerThan) GradoopTestUtils(org.gradoop.common.GradoopTestUtils) GradoopTestUtils.validateElementCollections(org.gradoop.common.GradoopTestUtils.validateElementCollections) ArrayList(java.util.ArrayList) HBaseFilters(org.gradoop.storage.hbase.utils.HBaseFilters) GradoopIdSet(org.gradoop.common.model.impl.id.GradoopIdSet) HBaseLabelIn(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBaseLabelIn) Lists(com.google.common.collect.Lists) HBaseDataSink(org.gradoop.storage.hbase.impl.io.HBaseDataSink) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) PropertyValue(org.gradoop.common.model.impl.properties.PropertyValue) GradoopHBaseTestBase(org.gradoop.storage.impl.hbase.GradoopHBaseTestBase) HBaseEPGMStore(org.gradoop.storage.hbase.impl.HBaseEPGMStore) LogicalGraph(org.gradoop.flink.model.impl.epgm.LogicalGraph) GradoopHBaseConfig(org.gradoop.storage.hbase.config.GradoopHBaseConfig) Assert.assertFalse(org.testng.Assert.assertFalse) AfterClass(org.testng.annotations.AfterClass) HBasePropEquals(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropEquals) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) IOException(java.io.IOException) GraphCollection(org.gradoop.flink.model.impl.epgm.GraphCollection) Collectors(java.util.stream.Collectors) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) List(java.util.List) HBasePropReg(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropReg) LocalCollectionOutputFormat(org.apache.flink.api.java.io.LocalCollectionOutputFormat) Query(org.gradoop.storage.common.predicate.query.Query) GradoopFlinkTestBase(org.gradoop.flink.model.GradoopFlinkTestBase) HBaseDataSource(org.gradoop.storage.hbase.impl.io.HBaseDataSource) Assert.assertTrue(org.testng.Assert.assertTrue) FlinkAsciiGraphLoader(org.gradoop.flink.util.FlinkAsciiGraphLoader) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) Identifiable(org.gradoop.common.model.api.entities.Identifiable) InputStream(java.io.InputStream) GraphCollection(org.gradoop.flink.model.impl.epgm.GraphCollection) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) HBaseDataSource(org.gradoop.storage.hbase.impl.io.HBaseDataSource) EPGMEdge(org.gradoop.common.model.impl.pojo.EPGMEdge) EPGMGraphHead(org.gradoop.common.model.impl.pojo.EPGMGraphHead) Test(org.testng.annotations.Test)

Example 2 with HBasePropReg

use of org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropReg in project gradoop by dbs-leipzig.

the class HBasePropRegTest method testToHBaseFilter.

/**
 * Test the toHBaseFilter function
 */
@Test
public void testToHBaseFilter() {
    String key = "key";
    Pattern pattern = Pattern.compile("^FooBar.*$");
    HBasePropReg<EPGMVertex> vertexFilter = new HBasePropReg<>(key, pattern);
    FilterList expectedFilter = new FilterList(FilterList.Operator.MUST_PASS_ALL);
    SingleColumnValueFilter valueFilter = new SingleColumnValueFilter(Bytes.toBytesBinary(CF_PROPERTY_VALUE), Bytes.toBytesBinary(key), CompareFilter.CompareOp.EQUAL, new RegexStringComparator(pattern.pattern()));
    // Define that the entire row will be skipped if the column is not found
    valueFilter.setFilterIfMissing(true);
    SingleColumnValueFilter typeFilter = new SingleColumnValueFilter(Bytes.toBytesBinary(CF_PROPERTY_TYPE), Bytes.toBytesBinary(key), CompareFilter.CompareOp.EQUAL, new byte[] { Type.STRING.getTypeByte() });
    // Define that the entire row will be skipped if the column is not found
    typeFilter.setFilterIfMissing(true);
    expectedFilter.addFilter(typeFilter);
    expectedFilter.addFilter(valueFilter);
    assertEquals(vertexFilter.toHBaseFilter(false).toString(), expectedFilter.toString(), "Failed during filter comparison for key [" + key + "].");
}
Also used : RegexStringComparator(org.apache.hadoop.hbase.filter.RegexStringComparator) Pattern(java.util.regex.Pattern) EPGMVertex(org.gradoop.common.model.impl.pojo.EPGMVertex) SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) HBasePropReg(org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropReg) FilterList(org.apache.hadoop.hbase.filter.FilterList) Test(org.testng.annotations.Test)

Aggregations

EPGMVertex (org.gradoop.common.model.impl.pojo.EPGMVertex)2 HBasePropReg (org.gradoop.storage.hbase.impl.predicate.filter.impl.HBasePropReg)2 Lists (com.google.common.collect.Lists)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 List (java.util.List)1 Pattern (java.util.regex.Pattern)1 Collectors (java.util.stream.Collectors)1 LocalCollectionOutputFormat (org.apache.flink.api.java.io.LocalCollectionOutputFormat)1 FilterList (org.apache.hadoop.hbase.filter.FilterList)1 RegexStringComparator (org.apache.hadoop.hbase.filter.RegexStringComparator)1 SingleColumnValueFilter (org.apache.hadoop.hbase.filter.SingleColumnValueFilter)1 GradoopTestUtils (org.gradoop.common.GradoopTestUtils)1 GradoopTestUtils.validateElementCollections (org.gradoop.common.GradoopTestUtils.validateElementCollections)1 GradoopTestUtils.validateGraphElementCollections (org.gradoop.common.GradoopTestUtils.validateGraphElementCollections)1 Identifiable (org.gradoop.common.model.api.entities.Identifiable)1 GradoopIdSet (org.gradoop.common.model.impl.id.GradoopIdSet)1 EPGMEdge (org.gradoop.common.model.impl.pojo.EPGMEdge)1