Search in sources :

Example 1 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class TwitterConverter method getNextTuple.

@Override
public Tuple getNextTuple() throws TexeraException {
    if (cursor == CLOSED) {
        throw new DataflowException(ErrorMessages.OPERATOR_NOT_OPENED);
    }
    Tuple tuple;
    while ((tuple = inputOperator.getNextTuple()) != null) {
        List<IField> tweetFields = generateFieldsFromJson(tuple.getField(rawDataAttribute).getValue().toString());
        if (!tweetFields.isEmpty()) {
            cursor++;
            List<IField> tupleFields = new ArrayList<>();
            final Tuple finalTuple = tuple;
            tupleFields.addAll(tuple.getSchema().getAttributeNames().stream().filter(attrName -> !attrName.equalsIgnoreCase(rawDataAttribute)).map(attrName -> finalTuple.getField(attrName, IField.class)).collect(Collectors.toList()));
            tupleFields.addAll(tweetFields);
            return new Tuple(outputSchema, tupleFields);
        }
    }
    return null;
}
Also used : DateTimeField(edu.uci.ics.texera.api.field.DateTimeField) Arrays(java.util.Arrays) ZonedDateTime(java.time.ZonedDateTime) Tuple(edu.uci.ics.texera.api.tuple.Tuple) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) ArrayList(java.util.ArrayList) List(java.util.List) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) IField(edu.uci.ics.texera.api.field.IField) TextField(edu.uci.ics.texera.api.field.TextField) StringField(edu.uci.ics.texera.api.field.StringField) DateTimeFormatter(java.time.format.DateTimeFormatter) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Schema(edu.uci.ics.texera.api.schema.Schema) JsonNode(com.fasterxml.jackson.databind.JsonNode) Attribute(edu.uci.ics.texera.api.schema.Attribute) IntegerField(edu.uci.ics.texera.api.field.IntegerField) AsterixSource(edu.uci.ics.texera.dataflow.source.asterix.AsterixSource) ArrayList(java.util.ArrayList) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 2 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class NltkSentimentOperator method popupOneTuple.

private Tuple popupOneTuple() {
    Tuple outputTuple = tupleBuffer.get(0);
    tupleBuffer.remove(0);
    if (tupleBuffer.isEmpty()) {
        tupleBuffer = null;
    }
    List<IField> outputFields = new ArrayList<>();
    outputFields.addAll(outputTuple.getFields());
    Integer className = idClassMap.get(outputTuple.getField(SchemaConstants._ID).getValue().toString());
    outputFields.add(new IntegerField(className));
    return new Tuple(outputSchema, outputFields);
}
Also used : ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 3 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class NltkSentimentOperatorTest method test1.

/*
     * Test sentiment test result should be positive.
     */
@Test
public void test1() throws TexeraException {
    TupleSourceOperator tupleSource = new TupleSourceOperator(Arrays.asList(NltkSentimentTestConstants.POSITIVE_TUPLE), NlpSentimentTestConstants.SENTIMENT_SCHEMA);
    NltkSentimentOperator nltkSentimentOperator = new NltkSentimentOperator(new NltkSentimentOperatorPredicate(NlpSentimentTestConstants.TEXT, "sentiment", BATCH_SIZE, MODEL_FILE_NAME));
    TupleSink tupleSink = new TupleSink();
    nltkSentimentOperator.setInputOperator(tupleSource);
    tupleSink.setInputOperator(nltkSentimentOperator);
    tupleSink.open();
    List<Tuple> results = tupleSink.collectAllTuples();
    tupleSink.close();
    Tuple tuple = results.get(0);
    Assert.assertEquals(tuple.getField("sentiment").getValue(), SentimentConstants.POSITIVE);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) TupleSourceOperator(edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 4 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class NltkSentimentOperatorTest method test2.

/*
     * Test sentiment test result should be negative
     */
@Test
public void test2() throws TexeraException {
    TupleSourceOperator tupleSource = new TupleSourceOperator(Arrays.asList(NltkSentimentTestConstants.NEGATIVE_TUPLE), NlpSentimentTestConstants.SENTIMENT_SCHEMA);
    NltkSentimentOperator nltkSentimentOperator = new NltkSentimentOperator(new NltkSentimentOperatorPredicate(NltkSentimentTestConstants.TEXT, "sentiment", BATCH_SIZE, MODEL_FILE_NAME));
    TupleSink tupleSink = new TupleSink();
    nltkSentimentOperator.setInputOperator(tupleSource);
    tupleSink.setInputOperator(nltkSentimentOperator);
    tupleSink.open();
    List<Tuple> results = tupleSink.collectAllTuples();
    tupleSink.close();
    Tuple tuple = results.get(0);
    Assert.assertEquals(tuple.getField("sentiment").getValue(), SentimentConstants.NEGATIVE);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) TupleSourceOperator(edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 5 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class FileSourceOperatorTest method test4.

/*
     * Test FileSourceOperator with a Directory with recursive = true and maxDepth = 2.
     * 
     * The files under the recursive sub-directories with recursive depth 2 will be read.
     *     
     * expected results: test1.txt, test2.txt and test4.txt will be included
     */
@Test
public void test4() throws Exception {
    String attrName = "content";
    Schema schema = new Schema(new Attribute(attrName, AttributeType.TEXT));
    FileSourcePredicate predicate = new FileSourcePredicate(tempFolderPath.toString(), attrName, true, 2);
    FileSourceOperator fileSource = new FileSourceOperator(predicate);
    Tuple tuple;
    ArrayList<Tuple> exactResults = new ArrayList<>();
    fileSource.open();
    while ((tuple = fileSource.getNextTuple()) != null) {
        exactResults.add(tuple);
    }
    fileSource.close();
    List<Tuple> expectedResults = Arrays.asList(new Tuple(schema, new TextField(tempFile1String)), new Tuple(schema, new TextField(tempFile2String)), new Tuple(schema, new TextField(tempFile4String)));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) TextField(edu.uci.ics.texera.api.field.TextField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (edu.uci.ics.texera.api.tuple.Tuple)332 ArrayList (java.util.ArrayList)191 Test (org.junit.Test)178 IField (edu.uci.ics.texera.api.field.IField)130 Schema (edu.uci.ics.texera.api.schema.Schema)126 Span (edu.uci.ics.texera.api.span.Span)100 StringField (edu.uci.ics.texera.api.field.StringField)96 Attribute (edu.uci.ics.texera.api.schema.Attribute)95 IntegerField (edu.uci.ics.texera.api.field.IntegerField)92 TextField (edu.uci.ics.texera.api.field.TextField)90 DoubleField (edu.uci.ics.texera.api.field.DoubleField)65 DateField (edu.uci.ics.texera.api.field.DateField)60 SimpleDateFormat (java.text.SimpleDateFormat)58 DataWriter (edu.uci.ics.texera.storage.DataWriter)33 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 ListField (edu.uci.ics.texera.api.field.ListField)28 TupleSourceOperator (edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator)24 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)23 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)21 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)21