Search in sources :

Example 71 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class LabeledRegexMatcherTest method testGetNextTupleLabeledRegex.

@Test
public void testGetNextTupleLabeledRegex() throws Exception {
    String query = "<name>";
    String keywordQuery = "george lin lin";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, keywordQuery, Arrays.asList(TestConstants.FIRST_NAME), "name", false, Integer.MAX_VALUE, 0);
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "george lin lin"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 14, query, "george lin lin"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 72 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class LabeledRegexMatcherTest method testIgnoreCaseLabeledRegex.

@Test
public void testIgnoreCaseLabeledRegex() throws Exception {
    String query = "<NAME>";
    String keywordQuery = "george lin lin";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, keywordQuery, Arrays.asList(TestConstants.FIRST_NAME), "name", false, Integer.MAX_VALUE, 0);
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "george lin lin"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 14, query, "george lin lin"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 73 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class LabeledRegexMatcherTest method testDisjunctionLabeledRegex.

@Test
public void testDisjunctionLabeledRegex() throws Exception {
    String query = "<lab1>|<lab2>";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, "short", Arrays.asList(TestConstants.DESCRIPTION), "lab1", false, Integer.MAX_VALUE, 0, "angry", "lab2");
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "Short" and "angry"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.DESCRIPTION, 0, 5, query, "Short"));
    spans.add(new Span(TestConstants.DESCRIPTION, 6, 11, query, "angry"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "Short" and "Angry"
    spans.clear();
    spans.add(new Span(TestConstants.DESCRIPTION, 15, 20, query, "Short"));
    spans.add(new Span(TestConstants.DESCRIPTION, 40, 45, query, "Angry"));
    spanField = new ListField<>(new ArrayList<>(spans));
    fields = new ArrayList<>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 74 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class LabeledRegexMatcherTest method testQueryWithoutQualifiersLabeledRegex2.

@Test
public void testQueryWithoutQualifiersLabeledRegex2() throws Exception {
    String query = "<lab2> is <lab1>";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, "short", Arrays.asList(TestConstants.DESCRIPTION), "lab1", false, Integer.MAX_VALUE, 0, "Clooney", "lab2");
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "Short angry"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.DESCRIPTION, 4, 20, query, "Clooney is Short"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 75 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class ExcelSinkTest method attributeTypeTest.

@Test
public // writing 10000 tuples
void attributeTypeTest() throws Exception {
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.AGE);
    attributeNames.add(TestConstants.HEIGHT);
    attributeNames.add(TestConstants.DATE_OF_BIRTH);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare Schema
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length];
    for (int count = 0; count < schemaAttributes.length; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    // Prepare 10000 tuples as a tupleList
    int testSize = 10000;
    Random rand = new Random();
    List<Tuple> tupleList = new ArrayList<Tuple>();
    for (int i = 0; i < testSize; i++) {
        IField[] fields = { new StringField(getRandomString()), new StringField(getRandomString()), new IntegerField(rand.nextInt()), new DoubleField(rand.nextDouble() * rand.nextInt()), new DateField(getRandomDate()), new TextField(getRandomString()) };
        tupleList.add(new Tuple(new Schema(schemaAttributes), fields));
    }
    assert (tupleList.size() == testSize);
    IOperator inputOperator = Mockito.mock(IOperator.class);
    Mockito.when(inputOperator.getOutputSchema()).thenReturn(new Schema(schemaAttributes));
    OngoingStubbing<Tuple> stubbing = Mockito.when(inputOperator.getNextTuple());
    for (Tuple t : tupleList) {
        stubbing = stubbing.thenReturn(t);
    }
    stubbing = stubbing.thenReturn(null);
    // excel writing test
    excelSink = new ExcelSink(new ExcelSinkPredicate());
    excelSink.setInputOperator(inputOperator);
    excelSink.open();
    excelSink.collectAllTuples();
    excelSink.close();
    Files.deleteIfExists(excelSink.getFilePath());
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Random(java.util.Random) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Test(org.junit.Test)

Aggregations

IField (edu.uci.ics.texera.api.field.IField)145 ArrayList (java.util.ArrayList)113 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 TextField (edu.uci.ics.texera.api.field.TextField)100 Span (edu.uci.ics.texera.api.span.Span)99 Schema (edu.uci.ics.texera.api.schema.Schema)92 Test (org.junit.Test)84 StringField (edu.uci.ics.texera.api.field.StringField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)78 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)58 Attribute (edu.uci.ics.texera.api.schema.Attribute)57 SimpleDateFormat (java.text.SimpleDateFormat)56 ListField (edu.uci.ics.texera.api.field.ListField)32 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4