Search in sources :

Example 21 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema5.

/*
     * Test a operator graph with a disconnected component
     *
     * KeywordSource --> RegexMatcher --> TupleSink
     * RegexMatcher --> NlpEntityOperator
     * (a disconnected graph)
     *
     */
@Test(expected = TexeraException.class)
public void testGetOutputSchema5() throws Exception {
    LogicalPlan validLogicalPlan = getLogicalPlan1();
    Plan queryPlan = validLogicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    regexMatcher.open();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
    regexMatcher.close();
    LogicalPlan logicalPlan = new LogicalPlan();
    String REGEX_ID_2 = "regex 2";
    RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
    regexPredicate2.setID(REGEX_ID_2);
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addOperator(tupleSinkPredicate);
    logicalPlan.addOperator(regexPredicate2);
    logicalPlan.addOperator(nlpEntityPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
    Schema raiseExceptionSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID_2);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Schema(edu.uci.ics.texera.api.schema.Schema) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) Test(org.junit.Test)

Example 22 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan2.

/*
     * Test a valid operator graph.
     *                  -> RegexMatcher -->
     * KeywordSource --<                     >-- Join --> TupleSink
     *                  -> NlpEntityOperator -->
     * 
     */
@Test
public void testLogicalPlan2() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan2();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator join = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(join instanceof Join);
    IOperator joinInput1 = ((Join) join).getInnerInputOperator();
    Assert.assertTrue(joinInput1 instanceof RegexMatcher);
    IOperator joinInput2 = ((Join) join).getOuterInputOperator();
    Assert.assertTrue(joinInput2 instanceof NlpEntityOperator);
    IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
    Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
    IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
    Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
    HashSet<Integer> connectorIndices = new HashSet<>();
    connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
    Assert.assertEquals(connectorIndices.size(), 2);
    OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
    OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
    Assert.assertSame(connector1, connector2);
    IOperator keywordSource = connector1.getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Join(edu.uci.ics.texera.dataflow.join.Join) Plan(edu.uci.ics.texera.api.engine.Plan) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) ISink(edu.uci.ics.texera.api.dataflow.ISink) ConnectorOutputOperator(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator) NlpEntityOperator(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) OneToNBroadcastConnector(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan3.

/*
     * Test a valid operator graph.
     * 
     *                  --> RegexMatcher -->
     *                  |                    >-- Join1
     * KeywordSource --< -> NlpEntityOperator -->          >-- Join2 --> TupleSink
     *                  |                           /
     *                  --> FuzzyTokenMatcher ----->
     * 
     */
@Test
public void testLogicalPlan3() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan3();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator join2 = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(join2 instanceof Join);
    IOperator join2Input1 = ((Join) join2).getOuterInputOperator();
    Assert.assertTrue(join2Input1 instanceof Join);
    IOperator join2Input2 = ((Join) join2).getInnerInputOperator();
    Assert.assertTrue(join2Input2 instanceof FuzzyTokenMatcher);
    IOperator join1Input1 = ((Join) join2Input1).getInnerInputOperator();
    Assert.assertTrue(join1Input1 instanceof RegexMatcher);
    IOperator join1Input2 = ((Join) join2Input1).getOuterInputOperator();
    Assert.assertTrue(join1Input2 instanceof NlpEntityOperator);
    IOperator connectorOut1 = ((RegexMatcher) join1Input1).getInputOperator();
    Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
    IOperator connectorOut2 = ((NlpEntityOperator) join1Input2).getInputOperator();
    Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
    IOperator connectorOut3 = ((FuzzyTokenMatcher) join2Input2).getInputOperator();
    Assert.assertTrue(connectorOut3 instanceof ConnectorOutputOperator);
    HashSet<Integer> connectorIndices = new HashSet<>();
    connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut3).getOutputIndex());
    Assert.assertEquals(connectorIndices.size(), 3);
    OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
    OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
    OneToNBroadcastConnector connector3 = ((ConnectorOutputOperator) connectorOut3).getOwnerConnector();
    Assert.assertSame(connector1, connector2);
    Assert.assertSame(connector1, connector3);
    IOperator keywordSource = connector1.getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Join(edu.uci.ics.texera.dataflow.join.Join) Plan(edu.uci.ics.texera.api.engine.Plan) FuzzyTokenMatcher(edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenMatcher) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) ISink(edu.uci.ics.texera.api.dataflow.ISink) ConnectorOutputOperator(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator) NlpEntityOperator(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) OneToNBroadcastConnector(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 24 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema2.

/*
     * Test getOutputSchema on a valid operator graph.
     *                  -> RegexMatcher -->
     * KeywordSource --<                     >-- Join --> TupleSink
     *                  -> NlpEntityOperator -->
     *
     */
@Test
public void testGetOutputSchema2() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan2();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    IOperator join = ((TupleSink) tupleSink).getInputOperator();
    IOperator joinInput1 = ((Join) join).getInnerInputOperator();
    IOperator joinInput2 = ((Join) join).getOuterInputOperator();
    IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
    IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
    OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
    OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
    IOperator keywordSource = connector1.getInputOperator();
    join.open();
    Schema expectedJoinOutputSchema = join.getOutputSchema();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = joinInput1.getOutputSchema();
    Schema expectedNlpEntityOutputSchema = joinInput2.getOutputSchema();
    join.close();
    Schema joinOutputSchema = logicalPlan.getOperatorOutputSchema(JOIN_DISTANCE_ID);
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Schema nlpEntityOutputSchema = logicalPlan.getOperatorOutputSchema(NLP_ENTITY_ID);
    Assert.assertEquals(expectedJoinOutputSchema, joinOutputSchema);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
    Assert.assertEquals(expectedNlpEntityOutputSchema, nlpEntityOutputSchema);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) ConnectorOutputOperator(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) NlpEntityOperator(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator) Schema(edu.uci.ics.texera.api.schema.Schema) Join(edu.uci.ics.texera.dataflow.join.Join) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) OneToNBroadcastConnector(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector) Test(org.junit.Test)

Example 25 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class QueryPlanResource method executeQueryPlan.

/**
 * This is the edu.uci.ics.texera.web.request handler for the execution of a Query Plan.
 * @param logicalPlanJson, the json representation of the logical plan
 * @return - Generic TexeraWebResponse object
 */
@POST
@Path("/execute")
public // TODO: investigate how to use LogicalPlan directly
JsonNode executeQueryPlan(String logicalPlanJson) {
    try {
        LogicalPlan logicalPlan = new ObjectMapper().readValue(logicalPlanJson, LogicalPlan.class);
        Plan plan = logicalPlan.buildQueryPlan();
        ISink sink = plan.getRoot();
        // send response back to frontend
        if (sink instanceof TupleSink) {
            TupleSink tupleSink = (TupleSink) sink;
            tupleSink.open();
            List<Tuple> results = tupleSink.collectAllTuples();
            tupleSink.close();
            // make sure result directory is created
            if (Files.notExists(resultDirectory)) {
                Files.createDirectories(resultDirectory);
            }
            // clean up old result files
            cleanupOldResults();
            // generate new UUID as the result id
            String resultID = UUID.randomUUID().toString();
            // write original json of the result into a file
            java.nio.file.Path resultFile = resultDirectory.resolve(resultID + ".json");
            Files.createFile(resultFile);
            Files.write(resultFile, new ObjectMapper().writeValueAsBytes(results));
            // put readable json of the result into response
            ArrayNode resultNode = new ObjectMapper().createArrayNode();
            for (Tuple tuple : results) {
                resultNode.add(tuple.getReadableJson());
            }
            ObjectNode response = new ObjectMapper().createObjectNode();
            response.put("code", 0);
            response.set("result", resultNode);
            response.put("resultID", resultID);
            return response;
        } else {
            // execute the plan and return success message
            Engine.getEngine().evaluate(plan);
            ObjectNode response = new ObjectMapper().createObjectNode();
            response.put("code", 1);
            response.put("message", "plan sucessfully executed");
            return response;
        }
    } catch (IOException | TexeraException e) {
        throw new TexeraWebException(e.getMessage());
    }
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) IOException(java.io.IOException) Plan(edu.uci.ics.texera.api.engine.Plan) LogicalPlan(edu.uci.ics.texera.dataflow.plangen.LogicalPlan) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) ISink(edu.uci.ics.texera.api.dataflow.ISink) LogicalPlan(edu.uci.ics.texera.dataflow.plangen.LogicalPlan) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) TexeraWebException(edu.uci.ics.texera.web.TexeraWebException) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Aggregations

TupleSink (edu.uci.ics.texera.dataflow.sink.tuple.TupleSink)25 Test (org.junit.Test)23 Tuple (edu.uci.ics.texera.api.tuple.Tuple)17 TupleSourceOperator (edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator)13 ISink (edu.uci.ics.texera.api.dataflow.ISink)8 Plan (edu.uci.ics.texera.api.engine.Plan)8 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 RegexMatcher (edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher)7 Schema (edu.uci.ics.texera.api.schema.Schema)5 OneToNBroadcastConnector (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector)3 ConnectorOutputOperator (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator)3 Join (edu.uci.ics.texera.dataflow.join.Join)3 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)3 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)3 HashSet (java.util.HashSet)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 BasicClient (com.twitter.hbc.httpclient.BasicClient)1