Search in sources :

Example 6 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class NltkSentimentOperatorTest method test2.

/*
     * Test sentiment test result should be negative
     */
@Test
public void test2() throws TexeraException {
    TupleSourceOperator tupleSource = new TupleSourceOperator(Arrays.asList(NltkSentimentTestConstants.NEGATIVE_TUPLE), NlpSentimentTestConstants.SENTIMENT_SCHEMA);
    NltkSentimentOperator nltkSentimentOperator = new NltkSentimentOperator(new NltkSentimentOperatorPredicate(NltkSentimentTestConstants.TEXT, "sentiment", BATCH_SIZE, MODEL_FILE_NAME));
    TupleSink tupleSink = new TupleSink();
    nltkSentimentOperator.setInputOperator(tupleSource);
    tupleSink.setInputOperator(nltkSentimentOperator);
    tupleSink.open();
    List<Tuple> results = tupleSink.collectAllTuples();
    tupleSink.close();
    Tuple tuple = results.get(0);
    Assert.assertEquals(tuple.getField("sentiment").getValue(), SentimentConstants.NEGATIVE);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) TupleSourceOperator(edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 7 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class NlpSplitTest method test2.

@Test
public void test2() throws TexeraException, ParseException {
    TupleSourceOperator tupleSource = new TupleSourceOperator(NlpSplitTestConstants.getOneToManyTestTuple(), NlpSplitTestConstants.SPLIT_SCHEMA);
    NlpSplitOperator sentence_list = new NlpSplitOperator(new NlpSplitPredicate(NLPOutputType.ONE_TO_MANY, NlpSplitTestConstants.TEXT, PropertyNameConstants.NLP_OUTPUT_TYPE));
    TupleSink tupleSink = new TupleSink();
    sentence_list.setInputOperator(tupleSource);
    tupleSink.setInputOperator(sentence_list);
    tupleSink.open();
    List<Tuple> results = tupleSink.collectAllTuples();
    tupleSink.close();
    Assert.assertTrue(TestUtils.equals(NlpSplitTestConstants.getOneToManyResultTuple(), results));
    Set<IDField> compset = new HashSet<IDField>();
    for (Tuple result : results) {
        Assert.assertFalse(compset.contains(result.getField(SchemaConstants._ID)));
        compset.add(result.getField(SchemaConstants._ID));
    }
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IDField(edu.uci.ics.texera.api.field.IDField) TupleSourceOperator(edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class NlpSplitTest method test1.

@Test
public void test1() throws TexeraException, ParseException {
    TupleSourceOperator tupleSource = new TupleSourceOperator(NlpSplitTestConstants.getOneToOneTestTuple(), NlpSplitTestConstants.SPLIT_SCHEMA);
    NlpSplitOperator sentence_list = new NlpSplitOperator(new NlpSplitPredicate(NLPOutputType.ONE_TO_ONE, NlpSplitTestConstants.TEXT, SchemaConstants.SPAN_LIST));
    TupleSink tupleSink = new TupleSink();
    sentence_list.setInputOperator(tupleSource);
    tupleSink.setInputOperator(sentence_list);
    tupleSink.open();
    List<Tuple> results = tupleSink.collectAllTuples();
    tupleSink.close();
    Assert.assertTrue(TestUtils.equals(NlpSplitTestConstants.getOneToOneResultTuple(), results));
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) TupleSourceOperator(edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 9 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema1.

/*
     * Test getOutputSchema on a valid operator graph.
     *
     * KeywordSource --> RegexMatcher --> TupleSink
     *
     */
@Test
public void testGetOutputSchema1() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan1();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    regexMatcher.open();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
    regexMatcher.close();
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Schema(edu.uci.ics.texera.api.schema.Schema) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) Test(org.junit.Test)

Example 10 with TupleSink

use of edu.uci.ics.texera.dataflow.sink.tuple.TupleSink in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan1.

/*
     * Test a valid operator graph.
     * 
     * KeywordSource --> RegexMatcher --> TupleSink
     * 
     */
@Test
public void testLogicalPlan1() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan1();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(regexMatcher instanceof RegexMatcher);
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) Test(org.junit.Test)

Aggregations

TupleSink (edu.uci.ics.texera.dataflow.sink.tuple.TupleSink)25 Test (org.junit.Test)23 Tuple (edu.uci.ics.texera.api.tuple.Tuple)17 TupleSourceOperator (edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator)13 ISink (edu.uci.ics.texera.api.dataflow.ISink)8 Plan (edu.uci.ics.texera.api.engine.Plan)8 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 RegexMatcher (edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher)7 Schema (edu.uci.ics.texera.api.schema.Schema)5 OneToNBroadcastConnector (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector)3 ConnectorOutputOperator (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator)3 Join (edu.uci.ics.texera.dataflow.join.Join)3 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)3 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)3 HashSet (java.util.HashSet)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 BasicClient (com.twitter.hbc.httpclient.BasicClient)1