Search in sources :

Example 1 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class TwitterConverter method getNextTuple.

@Override
public Tuple getNextTuple() throws TexeraException {
    if (cursor == CLOSED) {
        throw new DataflowException(ErrorMessages.OPERATOR_NOT_OPENED);
    }
    Tuple tuple;
    while ((tuple = inputOperator.getNextTuple()) != null) {
        List<IField> tweetFields = generateFieldsFromJson(tuple.getField(rawDataAttribute).getValue().toString());
        if (!tweetFields.isEmpty()) {
            cursor++;
            List<IField> tupleFields = new ArrayList<>();
            final Tuple finalTuple = tuple;
            tupleFields.addAll(tuple.getSchema().getAttributeNames().stream().filter(attrName -> !attrName.equalsIgnoreCase(rawDataAttribute)).map(attrName -> finalTuple.getField(attrName, IField.class)).collect(Collectors.toList()));
            tupleFields.addAll(tweetFields);
            return new Tuple(outputSchema, tupleFields);
        }
    }
    return null;
}
Also used : DateTimeField(edu.uci.ics.texera.api.field.DateTimeField) Arrays(java.util.Arrays) ZonedDateTime(java.time.ZonedDateTime) Tuple(edu.uci.ics.texera.api.tuple.Tuple) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) ArrayList(java.util.ArrayList) List(java.util.List) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) IField(edu.uci.ics.texera.api.field.IField) TextField(edu.uci.ics.texera.api.field.TextField) StringField(edu.uci.ics.texera.api.field.StringField) DateTimeFormatter(java.time.format.DateTimeFormatter) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Schema(edu.uci.ics.texera.api.schema.Schema) JsonNode(com.fasterxml.jackson.databind.JsonNode) Attribute(edu.uci.ics.texera.api.schema.Attribute) IntegerField(edu.uci.ics.texera.api.field.IntegerField) AsterixSource(edu.uci.ics.texera.dataflow.source.asterix.AsterixSource) ArrayList(java.util.ArrayList) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 2 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class WordCountOperator method computeNextMatchingTuple.

@Override
protected Tuple computeNextMatchingTuple() throws TexeraException {
    if (sortedWordCountMap == null) {
        computeWordCount();
    }
    if (wordCountIterator.hasNext()) {
        Entry<String, Integer> entry = wordCountIterator.next();
        List<IField> tupleFieldList = new ArrayList<>();
        // Generate the new UUID.
        tupleFieldList.add(IDField.newRandomID());
        tupleFieldList.add(new StringField(entry.getKey()));
        tupleFieldList.add(new IntegerField(entry.getValue()));
        return new Tuple(outputSchema, tupleFieldList);
    }
    return null;
}
Also used : StringField(edu.uci.ics.texera.api.field.StringField) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 3 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class OneToNBroadcastConnectorTest method testTwoOutputsWithProjection.

/*
     * This test connects Connector with Projection
     */
@Test
public void testTwoOutputsWithProjection() throws TexeraException {
    IOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE));
    List<String> projectionFields = Arrays.asList(TestConstants.DESCRIPTION);
    Schema projectionSchema = new Schema(TestConstants.DESCRIPTION_ATTR);
    IField[] fields1 = { new TextField("Tall Angry") };
    IField[] fields2 = { new TextField("Short Brown") };
    IField[] fields3 = { new TextField("White Angry") };
    IField[] fields4 = { new TextField("Lin Clooney is Short and lin clooney is Angry") };
    IField[] fields5 = { new TextField("Tall Fair") };
    IField[] fields6 = { new TextField("Short angry") };
    Tuple tuple1 = new Tuple(projectionSchema, fields1);
    Tuple tuple2 = new Tuple(projectionSchema, fields2);
    Tuple tuple3 = new Tuple(projectionSchema, fields3);
    Tuple tuple4 = new Tuple(projectionSchema, fields4);
    Tuple tuple5 = new Tuple(projectionSchema, fields5);
    Tuple tuple6 = new Tuple(projectionSchema, fields6);
    List<Tuple> expectedResults = Arrays.asList(tuple1, tuple2, tuple3, tuple4, tuple5, tuple6);
    ProjectionPredicate projectionPredicate = new ProjectionPredicate(projectionFields);
    ProjectionOperator projection1 = new ProjectionOperator(projectionPredicate);
    ProjectionOperator projection2 = new ProjectionOperator(projectionPredicate);
    OneToNBroadcastConnector connector = new OneToNBroadcastConnector(2);
    connector.setInputOperator(sourceOperator);
    projection1.setInputOperator(connector.getOutputOperator(0));
    projection2.setInputOperator(connector.getOutputOperator(1));
    projection1.open();
    List<Tuple> projection1Results = new ArrayList<>();
    Tuple nextTuple = null;
    while ((nextTuple = projection1.getNextTuple()) != null) {
        projection1Results.add(nextTuple);
    }
    projection1.close();
    projection2.open();
    List<Tuple> projection2Results = new ArrayList<>();
    nextTuple = null;
    while ((nextTuple = projection2.getNextTuple()) != null) {
        projection2Results.add(nextTuple);
    }
    projection2.close();
    Assert.assertTrue(TestUtils.equals(expectedResults, projection1Results));
    Assert.assertTrue(TestUtils.equals(expectedResults, projection2Results));
    Assert.assertTrue(TestUtils.equals(projection1Results, projection2Results));
}
Also used : ProjectionOperator(edu.uci.ics.texera.dataflow.projection.ProjectionOperator) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IField(edu.uci.ics.texera.api.field.IField) ProjectionPredicate(edu.uci.ics.texera.dataflow.projection.ProjectionPredicate) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) TextField(edu.uci.ics.texera.api.field.TextField) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 4 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInStringFieldUsingPhrase.

/**
 * Scenario: verifies GetNextTuple of DictionaryMatcher and multiple word
 * queries in String Field using PHRASEOPERATOR
 */
@Test
public void testSingleWordQueryInStringFieldUsingPhrase() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("john Lee", "bruce"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list1 = new ArrayList<Span>();
    Span span1 = new Span("lastName", 0, 8, "john Lee", "john Lee");
    Span span2 = new Span("firstName", 0, 5, "bruce", "bruce");
    list1.add(span1);
    list1.add(span2);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(list1) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 5 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingKeyword.

/**
 * Scenario: verifies GetNextTuple of DictionaryMatcher and single word
 * queries in Text Field using KEYWORD OPERATOR
 */
@Test
public void testSingleWordQueryInTextFieldUsingKeyword() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("tall"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("description", 0, 4, "tall", "Tall", 0);
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(list) };
    IField[] fields2 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.CONJUNCTION_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

IField (edu.uci.ics.texera.api.field.IField)145 ArrayList (java.util.ArrayList)113 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 TextField (edu.uci.ics.texera.api.field.TextField)100 Span (edu.uci.ics.texera.api.span.Span)99 Schema (edu.uci.ics.texera.api.schema.Schema)92 Test (org.junit.Test)84 StringField (edu.uci.ics.texera.api.field.StringField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)78 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)58 Attribute (edu.uci.ics.texera.api.schema.Attribute)57 SimpleDateFormat (java.text.SimpleDateFormat)56 ListField (edu.uci.ics.texera.api.field.ListField)32 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4