Search in sources :

Example 6 with StringField

use of edu.uci.ics.texera.api.field.StringField in project textdb by TextDB.

the class KeywordPhraseTest method testWordInMultipleFieldsQueryWithStopWords3.

/**
 * Verifies: Query with Stop Words match corresponding phrases with Medline
 * data
 *
 * @throws Exception
 *             with Medline data
 */
@Test
public void testWordInMultipleFieldsQueryWithStopWords3() throws Exception {
    // Prepare Query
    String query = "skin rash";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(keywordTestConstants.ABSTRACT);
    // Prepare expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span(keywordTestConstants.ABSTRACT, 192, 201, "skin rash", "skin rash");
    list.add(span1);
    Attribute[] schemaAttributes = new Attribute[keywordTestConstants.ATTRIBUTES_MEDLINE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = keywordTestConstants.ATTRIBUTES_MEDLINE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new IntegerField(14347980), new TextField(""), new TextField("CHRONIC MENINGOCOCCEMIA; EPIDEMIOLOGY, DIAGNOSIS AND TREATMENT."), new TextField("D S BLOOM"), new StringField("103 Aug, 1965"), new TextField("California medicine"), new TextField("DRUG THERAPY, MENINGOCOCCAL INFECTIONS, PENICILLIN G, SULFONAMIDES"), new TextField("Drug Therapy, Meningococcal Infections, Penicillin G, Sulfonamides"), new TextField("This report describes four cases of chronic meningococcemia with the characteristic manifestations of recurrent episodes of " + "fever, chills, night sweats, headache and anorexia, associated with skin rash and arthralgias. The diagnosis was established in all instances by blood culture. Administration " + "of sulfonamides in three cases and penicillin in the fourth resulted in prompt recovery. The recent finding of a strain of sulfonamide-resistant meningococci, however, indicates " + "that antibiotic-sensitivity tests should be carried out in all cases of meningococcal disease. While waiting for the results of such tests to be reported, the clinician should " + "initiate treatment with large doses of a sulfonamide and penicillin in combination."), new DoubleField(0.664347980), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    List<Tuple> results = KeywordTestHelper.getQueryResults(MEDLINE_TABLE, query, attributeNames, phrase);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, results);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 7 with StringField

use of edu.uci.ics.texera.api.field.StringField in project textdb by TextDB.

the class KeywordPhraseTest method testCombinedSpanInMultipleFieldsQuery.

/**
 * Verifies: getNextTuple should return Combined Span info for the phrase
 *
 * @throws Exception
 */
@Test
public void testCombinedSpanInMultipleFieldsQuery() throws Exception {
    // Prepare Query
    String query = "lin clooney";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span("lastName", 0, 11, "lin clooney", "lin clooney");
    Span span2 = new Span("description", 0, 11, "lin clooney", "Lin Clooney");
    Span span3 = new Span("description", 25, 36, "lin clooney", "lin clooney");
    list.add(span1);
    list.add(span2);
    list.add(span3);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform Query
    List<Tuple> resultList = KeywordTestHelper.getQueryResults(PEOPLE_TABLE, query, attributeNames, phrase);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 8 with StringField

use of edu.uci.ics.texera.api.field.StringField in project textdb by TextDB.

the class KeywordPhraseTest method testWordInMultipleFieldsQueryWithStopWords2.

/**
 * Verifies: Query with Stop Words match corresponding phrases in the
 * document
 *
 * @throws Exception
 */
@Test
public void testWordInMultipleFieldsQueryWithStopWords2() throws Exception {
    // Prepare Query
    String query = "lin clooney and angry";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span("description", 25, 45, "lin clooney and angry", "lin clooney is Angry");
    list.add(span1);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform Query
    List<Tuple> resultList = KeywordTestHelper.getQueryResults(PEOPLE_TABLE, query, attributeNames, phrase);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 9 with StringField

use of edu.uci.ics.texera.api.field.StringField in project textdb by TextDB.

the class RelationManagerTest method test9.

/*
     * Test inserting a tuple to a table, then update it, then delete it 
     */
@Test
public void test9() throws Exception {
    String tableName = "relation_manager_test_table";
    String tableDirectory = "./index/test_table";
    Schema tableSchema = new Schema(new Attribute("content", AttributeType.STRING));
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(tableName);
    relationManager.createTable(tableName, Paths.get(tableDirectory), tableSchema, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(tableName);
    dataWriter.open();
    Tuple insertedTuple = new Tuple(tableSchema, new StringField("test"));
    IDField idField = dataWriter.insertTuple(insertedTuple);
    dataWriter.close();
    Tuple returnedTuple = relationManager.getTupleByID(tableName, idField);
    Assert.assertEquals(insertedTuple.getField("content").getValue().toString(), returnedTuple.getField("content").getValue().toString());
    dataWriter.open();
    Tuple updatedTuple = new Tuple(tableSchema, new StringField("testUpdate"));
    dataWriter.updateTuple(updatedTuple, idField);
    dataWriter.close();
    Tuple returnedUpdatedTuple = relationManager.getTupleByID(tableName, idField);
    Assert.assertEquals(updatedTuple.getField("content").getValue().toString(), returnedUpdatedTuple.getField("content").getValue().toString());
    dataWriter.open();
    dataWriter.deleteTupleByID(idField);
    dataWriter.close();
    Tuple deletedTuple = relationManager.getTupleByID(tableName, idField);
    Assert.assertNull(deletedTuple);
    relationManager.deleteTable(tableName);
}
Also used : IDField(edu.uci.ics.texera.api.field.IDField) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) StringField(edu.uci.ics.texera.api.field.StringField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 10 with StringField

use of edu.uci.ics.texera.api.field.StringField in project textdb by TextDB.

the class RelationManagerTest method test10.

/*
     * Test inserting multiple tuples to a table, getting them by a query, then deleting them by a query
     */
@Test
public void test10() throws Exception {
    String tableName = "relation_manager_test_table";
    String tableDirectory = "./index/test_table";
    Schema tableSchema = new Schema(new Attribute("content", AttributeType.STRING), new Attribute("number", AttributeType.STRING));
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(tableName);
    relationManager.createTable(tableName, Paths.get(tableDirectory), tableSchema, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(tableName);
    dataWriter.open();
    Tuple insertedTuple = new Tuple(tableSchema, new StringField("test"), new StringField("1"));
    dataWriter.insertTuple(insertedTuple);
    Tuple insertedTuple2 = new Tuple(tableSchema, new StringField("test"), new StringField("2"));
    IDField idField2 = dataWriter.insertTuple(insertedTuple2);
    Tuple insertedTuple3 = new Tuple(tableSchema, new StringField("test"), new StringField("3"));
    dataWriter.insertTuple(insertedTuple3);
    dataWriter.close();
    // test should match all 3 tuples
    Query allTupleQuery = new TermQuery(new Term("content", "test"));
    DataReader allTupleReader = relationManager.getTableDataReader(tableName, allTupleQuery);
    int tupleCounter = 0;
    allTupleReader.open();
    while (allTupleReader.getNextTuple() != null) {
        tupleCounter++;
    }
    allTupleReader.close();
    Assert.assertEquals(3, tupleCounter);
    // tuple 2 should be deleted
    Query tuple2Query = new TermQuery(new Term("number", "2"));
    dataWriter.open();
    dataWriter.deleteTuple(tuple2Query);
    dataWriter.close();
    Tuple deletedTuple = relationManager.getTupleByID(tableName, idField2);
    Assert.assertNull(deletedTuple);
    relationManager.deleteTable(tableName);
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) IDField(edu.uci.ics.texera.api.field.IDField) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) Term(org.apache.lucene.index.Term) StringField(edu.uci.ics.texera.api.field.StringField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

StringField (edu.uci.ics.texera.api.field.StringField)103 Tuple (edu.uci.ics.texera.api.tuple.Tuple)94 IField (edu.uci.ics.texera.api.field.IField)87 IntegerField (edu.uci.ics.texera.api.field.IntegerField)87 TextField (edu.uci.ics.texera.api.field.TextField)79 Schema (edu.uci.ics.texera.api.schema.Schema)75 ArrayList (java.util.ArrayList)74 Test (org.junit.Test)70 Span (edu.uci.ics.texera.api.span.Span)64 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)60 Attribute (edu.uci.ics.texera.api.schema.Attribute)60 SimpleDateFormat (java.text.SimpleDateFormat)58 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 JsonNode (com.fasterxml.jackson.databind.JsonNode)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)8 IDField (edu.uci.ics.texera.api.field.IDField)5 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)4