Search in sources :

Example 51 with Span

use of edu.uci.ics.textdb.api.span.Span in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingPhrase.

/**
     * Scenario S-7:verifies GetNextTuple of DictionaryMatcher and single word
     * queries in Text Field using PHRASE OPERATOR
     */
@Test
public void testSingleWordQueryInTextFieldUsingPhrase() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("tall"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("description", 0, 4, "tall", "Tall");
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(list) };
    IField[] fields2 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 52 with Span

use of edu.uci.ics.textdb.api.span.Span in project textdb by TextDB.

the class DictionaryMatcherTest method testMultipleWordsQueryUsingScan.

/**
     * Scenario S-8:verifies ITuple returned by DictionaryMatcher and multiple
     * word queries using SCAN OPERATOR
     */
@Test
public void testMultipleWordsQueryUsingScan() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("george lin lin"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("firstName", 0, 14, "george lin lin", "george lin lin");
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.SUBSTRING_SCANBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 53 with Span

use of edu.uci.ics.textdb.api.span.Span in project textdb by TextDB.

the class DictionaryMatcherTest method testMultipleWordsQueryUsingPhraseChinese.

/**
     * Scenario S-10C:verifies ITuple returned by DictionaryMatcher and multiple
     * word queries using PHRASE OPERATOR in Chinese
     */
@Test
public void testMultipleWordsQueryUsingPhraseChinese() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("洛克贝尔"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("lastName", 0, 4, "洛克贝尔", "洛克贝尔");
    ;
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 54 with Span

use of edu.uci.ics.textdb.api.span.Span in project textdb by TextDB.

the class DataflowUtils method getSpanListString.

/**
     * Transform a list of spans into string
     * 
     * @param tuple
     * @return string representation of a list of spans
     */
public static String getSpanListString(List<Span> spanList) {
    StringBuilder sb = new StringBuilder();
    sb.append("span list:\n");
    for (Span span : spanList) {
        sb.append(getSpanString(span));
        sb.append("\n");
    }
    return sb.toString();
}
Also used : Span(edu.uci.ics.textdb.api.span.Span)

Example 55 with Span

use of edu.uci.ics.textdb.api.span.Span in project textdb by TextDB.

the class DataflowUtils method getTupleString.

/**
     * Transform a tuple into string
     * 
     * @param tuple
     * @return string representation of the tuple
     */
public static String getTupleString(Tuple tuple) {
    StringBuilder sb = new StringBuilder();
    Schema schema = tuple.getSchema();
    for (Attribute attribute : schema.getAttributes()) {
        if (attribute.getAttributeName().equals(SchemaConstants.SPAN_LIST)) {
            ListField<Span> spanListField = tuple.getField(SchemaConstants.SPAN_LIST);
            List<Span> spanList = spanListField.getValue();
            sb.append(getSpanListString(spanList));
            sb.append("\n");
        } else {
            sb.append(attribute.getAttributeName());
            sb.append("(");
            sb.append(attribute.getAttributeType().toString());
            sb.append(")");
            sb.append(": ");
            sb.append(tuple.getField(attribute.getAttributeName()).getValue().toString());
            sb.append("\n");
        }
    }
    return sb.toString();
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Attribute(edu.uci.ics.textdb.api.schema.Attribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Schema(edu.uci.ics.textdb.api.schema.Schema) Span(edu.uci.ics.textdb.api.span.Span)

Aggregations

Span (edu.uci.ics.textdb.api.span.Span)112 ArrayList (java.util.ArrayList)97 Schema (edu.uci.ics.textdb.api.schema.Schema)88 IField (edu.uci.ics.textdb.api.field.IField)86 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)80 TextField (edu.uci.ics.textdb.api.field.TextField)71 Attribute (edu.uci.ics.textdb.api.schema.Attribute)71 Test (org.junit.Test)71 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)60 StringField (edu.uci.ics.textdb.api.field.StringField)58 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)49 DateField (edu.uci.ics.textdb.api.field.DateField)46 SimpleDateFormat (java.text.SimpleDateFormat)46 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)25 ListField (edu.uci.ics.textdb.api.field.ListField)18 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)10 AttributeType (edu.uci.ics.textdb.api.schema.AttributeType)9 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)9 DataFlowException (edu.uci.ics.textdb.api.exception.DataFlowException)7 SchemaConstants (edu.uci.ics.textdb.api.constants.SchemaConstants)5