Search in sources :

Example 16 with Attribute

use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.

the class JoinTestHelper method alterField.

/**
 * Alter a field of a tuple. ( The schema will also be changed accordingly. )
 * @param originalTuple
 * @param fieldIndex
 * @param newField
 * @return
 */
public static Tuple alterField(Tuple originalTuple, int fieldIndex, IField newField) {
    List<Attribute> originalAttributes = originalTuple.getSchema().getAttributes();
    List<Attribute> newAttributes = new ArrayList<>();
    List<IField> newFields = new ArrayList<>();
    for (int i = 0; i < originalAttributes.size(); i++) {
        if (i == fieldIndex) {
            newAttributes.add(new Attribute(originalAttributes.get(i).getName(), AttributeType.getAttributeType(newField.getClass())));
            newFields.add(newField);
        } else {
            newAttributes.add(originalAttributes.get(i));
            newFields.add(originalTuple.getFields().get(i));
        }
    }
    return new Tuple(new Schema(newAttributes.stream().toArray(Attribute[]::new)), newFields.stream().toArray(IField[]::new));
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 17 with Attribute

use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.

the class JoinDistancePredicate method generateIntersectionSchema.

/**
 * Create outputSchema, which is the intersection of innerOperator's schema and outerOperator's schema.
 * The attributes have to be exactly the same (name and type) to be intersected.
 *
 * InnerOperator's attributes and outerOperator's attributes must:
 * both contain the attributes to be joined.
 * both contain "_ID" attribute.
 * both contain "spanList" attribute.
 *
 * @return outputSchema
 */
private Schema generateIntersectionSchema(Schema innerOperatorSchema, Schema outerOperatorSchema) throws DataflowException {
    List<Attribute> innerAttributes = innerOperatorSchema.getAttributes();
    List<Attribute> outerAttributes = outerOperatorSchema.getAttributes();
    List<Attribute> intersectionAttributes = innerAttributes.stream().filter(attr -> outerAttributes.contains(attr)).collect(Collectors.toList());
    Schema intersectionSchema = new Schema(intersectionAttributes.stream().toArray(Attribute[]::new));
    // check if output schema contain necessary attributes
    if (intersectionSchema.getAttributes().isEmpty()) {
        throw new DataflowException("inner operator and outer operator don't share any common attributes");
    } else if (!intersectionSchema.containsAttribute(this.joinAttributeName)) {
        throw new DataflowException("inner operator or outer operator doesn't contain join attribute");
    } else if (!intersectionSchema.containsAttribute(SchemaConstants._ID)) {
        throw new DataflowException("inner operator or outer operator doesn't contain _ID attribute");
    } else if (!intersectionSchema.containsAttribute(SchemaConstants.SPAN_LIST)) {
        throw new DataflowException("inner operator or outer operator doesn't contain spanList attribute");
    }
    // check if join attribute is TEXT or STRING
    AttributeType joinAttrType = intersectionSchema.getAttribute(this.joinAttributeName).getType();
    if (joinAttrType != AttributeType.TEXT && joinAttrType != AttributeType.STRING) {
        throw new DataflowException(String.format("Join attribute %s must be either TEXT or STRING.", this.joinAttributeName));
    }
    return intersectionSchema;
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) ListField(edu.uci.ics.texera.api.field.ListField) edu.uci.ics.texera.api.tuple(edu.uci.ics.texera.api.tuple) Iterator(java.util.Iterator) PropertyNameConstants(edu.uci.ics.texera.dataflow.common.PropertyNameConstants) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) PredicateBase(edu.uci.ics.texera.dataflow.common.PredicateBase) Collectors(java.util.stream.Collectors) Span(edu.uci.ics.texera.api.span.Span) ArrayList(java.util.ArrayList) List(java.util.List) SchemaConstants(edu.uci.ics.texera.api.constants.SchemaConstants) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) IField(edu.uci.ics.texera.api.field.IField) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) Attribute(edu.uci.ics.texera.api.schema.Attribute) Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 18 with Attribute

use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.

the class ProjectionOperator method setUp.

@Override
protected void setUp() throws TexeraException {
    inputSchema = inputOperator.getOutputSchema();
    List<Attribute> outputAttributes = inputSchema.getAttributes().stream().filter(attr -> predicate.getProjectionFields().contains(attr.getName().toLowerCase())).collect(Collectors.toList());
    if (outputAttributes.size() != predicate.getProjectionFields().size()) {
        throw new DataflowException("input schema doesn't contain one of the attributes to be projected");
    }
    outputSchema = new Schema(outputAttributes.stream().toArray(Attribute[]::new));
}
Also used : List(java.util.List) IField(edu.uci.ics.texera.api.field.IField) AbstractSingleInputOperator(edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator) Tuple(edu.uci.ics.texera.api.tuple.Tuple) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) Schema(edu.uci.ics.texera.api.schema.Schema) Attribute(edu.uci.ics.texera.api.schema.Attribute) Collectors(java.util.stream.Collectors) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 19 with Attribute

use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.

the class MysqlSink method mysqlCreateTable.

/**
 * Get the table schema from inputOperator, create table in mysql database
 */
private int mysqlCreateTable() {
    List<Attribute> attributeList = outputSchema.getAttributes();
    String createTableStatement = "CREATE TABLE " + predicate.getTable() + " (\n";
    createTableStatement += attributeList.stream().map(attr -> convertAttribute(attr)).collect(Collectors.joining(",\n"));
    createTableStatement += "\n); ";
    try {
        if (statement == null)
            statement = connection.createStatement();
        return statement.executeUpdate(createTableStatement);
    } catch (SQLException e) {
        throw new DataflowException("MysqlSink failed to create table " + predicate.getTable() + ". " + e.getMessage());
    }
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) SQLException(java.sql.SQLException) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 20 with Attribute

use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.

the class KeywordConjunctionTest method testMatchingWithLimit.

@Test
public void testMatchingWithLimit() throws TexeraException, ParseException, java.text.ParseException {
    String query = "angry";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    List<Tuple> resultList = KeywordTestHelper.getQueryResults(PEOPLE_TABLE, query, attributeNames, conjunction, 3, 0);
    List<Tuple> expectedList = new ArrayList<>();
    Span span1 = new Span("description", 5, 10, "angry", "Angry", 1);
    Span span2 = new Span("description", 6, 11, "angry", "Angry", 1);
    Span span3 = new Span("description", 40, 45, "angry", "Angry", 8);
    Span span4 = new Span("description", 6, 11, "angry", "angry", 1);
    List<Span> list1 = new ArrayList<>();
    list1.add(span1);
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<>(list1) };
    List<Span> list2 = new ArrayList<>();
    list2.add(span2);
    IField[] fields2 = { new StringField("brad lie angelina"), new StringField("pitt"), new IntegerField(44), new DoubleField(6.10), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-12-1972")), new TextField("White Angry"), new ListField<>(list2) };
    List<Span> list3 = new ArrayList<>();
    list3.add(span3);
    IField[] fields3 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list3) };
    List<Span> list4 = new ArrayList<>();
    list4.add(span4);
    IField[] fields4 = { new StringField("Mary brown"), new StringField("Lake Forest"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Short angry"), new ListField<>(list4) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    Tuple tuple3 = new Tuple(new Schema(schemaAttributes), fields3);
    Tuple tuple4 = new Tuple(new Schema(schemaAttributes), fields4);
    expectedList.add(tuple1);
    expectedList.add(tuple2);
    expectedList.add(tuple3);
    expectedList.add(tuple4);
    Assert.assertEquals(expectedList.size(), 4);
    Assert.assertEquals(resultList.size(), 3);
    Assert.assertTrue(TestUtils.containsAll(expectedList, resultList));
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(edu.uci.ics.texera.api.tuple.Tuple) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Test(org.junit.Test)

Aggregations

Attribute (edu.uci.ics.texera.api.schema.Attribute)98 Test (org.junit.Test)81 Tuple (edu.uci.ics.texera.api.tuple.Tuple)78 ArrayList (java.util.ArrayList)76 Schema (edu.uci.ics.texera.api.schema.Schema)75 IField (edu.uci.ics.texera.api.field.IField)60 StringField (edu.uci.ics.texera.api.field.StringField)56 TextField (edu.uci.ics.texera.api.field.TextField)56 IntegerField (edu.uci.ics.texera.api.field.IntegerField)54 DoubleField (edu.uci.ics.texera.api.field.DoubleField)53 Span (edu.uci.ics.texera.api.span.Span)51 DateField (edu.uci.ics.texera.api.field.DateField)50 SimpleDateFormat (java.text.SimpleDateFormat)47 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)28 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)8 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)6 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)5