Search in sources :

Example 61 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class RegexMatcherTest method testRegexText2.

@Test
public void testRegexText2() throws Exception {
    String query = "follow(-| )?up";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "followup"
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsText.SCHEMA_TEXT).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 28, 36, query, "followup"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 54, 62, query, "followup"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow up"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 18, 27, query, "follow up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 51, 60, query, "follow up"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow-up" & "followup"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 24, 33, query, "follow-up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 38, 46, query, "followup"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 62 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class RegexMatcherTest method testRegexText4.

@Test
public void testRegexText4() throws Exception {
    String query = "\\[(.)?\\]";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match [a] & [!]
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsText.SCHEMA_TEXT).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 110, 113, query, "[a]"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 120, 123, query, "[!]"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(10).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 63 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class TestConstants method getSamplePeopleTuples.

public static List<Tuple> getSamplePeopleTuples() {
    try {
        IField[] fields0 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry") };
        IField[] fields1 = { new StringField("tom hanks"), new StringField("cruise"), new IntegerField(45), new DoubleField(5.95), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1971")), new TextField("Short Brown") };
        IField[] fields2 = { new StringField("brad lie angelina"), new StringField("pitt"), new IntegerField(44), new DoubleField(6.10), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-12-1972")), new TextField("White Angry") };
        IField[] fields3 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry") };
        IField[] fields4 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair") };
        IField[] fields5 = { new StringField("Mary brown"), new StringField("Lake Forest"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Short angry") };
        Tuple tuple0 = new Tuple(SCHEMA_PEOPLE, fields0);
        Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1);
        Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2);
        Tuple tuple3 = new Tuple(SCHEMA_PEOPLE, fields3);
        Tuple tuple4 = new Tuple(SCHEMA_PEOPLE, fields4);
        Tuple tuple5 = new Tuple(SCHEMA_PEOPLE, fields5);
        return Arrays.asList(tuple0, tuple1, tuple2, tuple3, tuple4, tuple5);
    } catch (ParseException e) {
        // exception should not happen because we know the data is correct
        e.printStackTrace();
        return Arrays.asList();
    }
}
Also used : StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) IntegerField(edu.uci.ics.texera.api.field.IntegerField) DateField(edu.uci.ics.texera.api.field.DateField) ParseException(java.text.ParseException) IField(edu.uci.ics.texera.api.field.IField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField)

Example 64 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class TestConstantsChinese method getSamplePeopleTuples.

public static List<Tuple> getSamplePeopleTuples() {
    try {
        IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院") };
        IField[] fields2 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院") };
        IField[] fields3 = { new StringField("宋江"), new StringField("建筑"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("伟大的建筑是历史的坐标,具有传承的价值。") };
        Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1);
        Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2);
        Tuple tuple3 = new Tuple(SCHEMA_PEOPLE, fields3);
        return Arrays.asList(tuple1, tuple2, tuple3);
    } catch (ParseException e) {
        // exception should not happen because we know the data is correct
        e.printStackTrace();
        return Arrays.asList();
    }
}
Also used : StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) IntegerField(edu.uci.ics.texera.api.field.IntegerField) DateField(edu.uci.ics.texera.api.field.DateField) ParseException(java.text.ParseException) IField(edu.uci.ics.texera.api.field.IField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField)

Example 65 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class TestConstantsChineseWordCount method getSamplePeopleTuples.

public static List<Tuple> getSamplePeopleTuples() {
    try {
        IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("中新社北京4月26日电 (记者 刘育英)“中国制造2025”政策措施实施以来,“为稳定工业增长、加快制造业转型升级发" + "挥了重要作用”,效果初步显现。") };
        IField[] fields2 = { new StringField("tom hanks"), new StringField("cruise"), new IntegerField(45), new DoubleField(5.95), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1971")), new TextField("  中国2015年发布了“中国制造2025”通知。中国工业和信息化部运行监测协调局副" + "局长黄利斌26日在国新办新闻发布会上表示,自“中国制造2025”实施以来,国家制造业创新中心建设、智能制造" + "、工业强基、绿色制造、高端装备创新等“五大工程”扎实推进;2016年度15个重大标志性项目中,7个完全落实" + ",4个基本落实,其余正在推进。") };
        IField[] fields3 = { new StringField("brad lie angelina"), new StringField("pitt"), new IntegerField(44), new DoubleField(6.10), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-12-1972")), new TextField("  2017年,工信部将重点推进六方面工作:加大“五大工程”实施力度," + "积极推进创新中心建设;扩大试点示范城市(群)覆盖面;实施新一轮重大技术改造升级工程;" + "推进" + "制造业与互联网融合发展;优化制造业发展环境。") };
        IField[] fields4 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("  黄利斌说,今年继续开展“互联" + "网+”制造业试点示范,加快工业互联网基础设施改造升级。现在,47%的大企业" + "搭建了运营协同创新平台,两化融合(信息化和工业化融合)管理体系贯标企业运" + "营成本平均下降了8.8%,经营利润平均增长了6.9%。") };
        IField[] fields5 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("  工信部今年还将" + "选取20-30个城市(群)继续开展“中国制造2025”试点示范创建," + "指导试点示范城市(群),在落实新发展理念等方面先行先试。") };
        IField[] fields6 = { new StringField("Mary brown"), new StringField("Lake Forest"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("资料图:由驻日中" + "资太阳能企业开发、承建并运营维护的日本岛根县滨田市第二期12兆瓦光伏电站项目(滨田MS太阳能发电站),4月25日在当" + "地举行竣工典礼。该大型太阳能电站的九成设备来自“中国制造”。在日本并网发电的特高压太阳能电站中,这是中国产设备占" + "比最高的项目。中新社记者 王健 摄") };
        Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1);
        Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2);
        Tuple tuple3 = new Tuple(SCHEMA_PEOPLE, fields3);
        Tuple tuple4 = new Tuple(SCHEMA_PEOPLE, fields4);
        Tuple tuple5 = new Tuple(SCHEMA_PEOPLE, fields5);
        Tuple tuple6 = new Tuple(SCHEMA_PEOPLE, fields6);
        return Arrays.asList(tuple1, tuple2, tuple3, tuple4, tuple5, tuple6);
    // return Arrays.asList(tuple1);
    } catch (ParseException e) {
        // exception should not happen because we know the data is correct
        e.printStackTrace();
        return Arrays.asList();
    }
}
Also used : StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) IntegerField(edu.uci.ics.texera.api.field.IntegerField) DateField(edu.uci.ics.texera.api.field.DateField) ParseException(java.text.ParseException) IField(edu.uci.ics.texera.api.field.IField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField)

Aggregations

IField (edu.uci.ics.texera.api.field.IField)145 ArrayList (java.util.ArrayList)113 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 TextField (edu.uci.ics.texera.api.field.TextField)100 Span (edu.uci.ics.texera.api.span.Span)99 Schema (edu.uci.ics.texera.api.schema.Schema)92 Test (org.junit.Test)84 StringField (edu.uci.ics.texera.api.field.StringField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)78 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)58 Attribute (edu.uci.ics.texera.api.schema.Attribute)57 SimpleDateFormat (java.text.SimpleDateFormat)56 ListField (edu.uci.ics.texera.api.field.ListField)32 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4