Search in sources :

Example 11 with KeywordMatcherSourceOperator

use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.

the class JoinDistanceTest method testOneSpanEncompassesOtherAndDifferenceGreaterThanThreshold.

/*
     * This case tests for the scenario when one of the spans to be joined encompasses the other span
     * and |(span 1 spanStartIndex) - (span 2 spanStartIndex)|
     * and/or |(span 1 spanEndIndex) - (span 2 spanEndIndex)| exceed threshold.
     * 
     * e.g.
     * [<11, 18>]
     * [<3, 33>]
     * threshold = 10 (beyond threshold)
     * Test result: Join should return an empty list.
     */
@Test
public void testOneSpanEncompassesOtherAndDifferenceGreaterThanThreshold() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "takes a special kind of writer", phrase);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 10), Integer.MAX_VALUE, 0);
    Assert.assertEquals(0, resultList.size());
}
Also used : JoinDistancePredicate(edu.uci.ics.textdb.exp.join.JoinDistancePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator) Test(org.junit.Test)

Example 12 with KeywordMatcherSourceOperator

use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.

the class JoinTestHelper method getKeywordSource.

/**
     * Provides a KeywordMatcherSourceOperator for a test table given a keyword.
     * ( KeywordMatcher is used in most of Join test cases )
     * @param tableName
     * @param query
     * @param matchingType
     * @return
     * @throws TextDBException
     */
public static KeywordMatcherSourceOperator getKeywordSource(String tableName, String query, KeywordMatchingType matchingType) throws TextDBException {
    KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(query, Arrays.asList(JoinTestConstants.AUTHOR, JoinTestConstants.TITLE, JoinTestConstants.REVIEW), RelationManager.getRelationManager().getTableAnalyzerString(tableName), matchingType, tableName, SchemaConstants.SPAN_LIST);
    KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
    return keywordSource;
}
Also used : KeywordSourcePredicate(edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)

Example 13 with KeywordMatcherSourceOperator

use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.

the class DictionaryMatcherSourceOperator method open.

/**
     * @about Opens dictionary matcher. Must call open() before calling
     *        getNextTuple().
     */
@Override
public void open() throws DataFlowException {
    try {
        currentDictionaryEntry = predicate.getDictionary().getNextEntry();
        if (currentDictionaryEntry == null) {
            throw new DataFlowException("Dictionary is empty");
        }
        if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED) {
            // For Substring matching, create a scan source operator.
            indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
            indexSource.open();
            // Substring matching's output schema needs to contains span
            // list.
            inputSchema = indexSource.getOutputSchema();
            outputSchema = inputSchema;
            if (inputSchema.containsField(predicate.getSpanListName())) {
                throw new DataFlowException(ErrorMessages.DUPLICATE_ATTRIBUTE(predicate.getSpanListName(), inputSchema));
            }
            outputSchema = Utils.addAttributeToSchema(outputSchema, new Attribute(predicate.getSpanListName(), AttributeType.LIST));
        } else {
            // For other keyword matching types (conjunction and phrase),
            // create keyword matcher based on index.
            keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
            keywordSource.open();
            // Other keyword matching types uses a KeywordMatcher, so the
            // output schema is the same as keywordMatcher's schema
            inputSchema = keywordSource.getOutputSchema();
            outputSchema = keywordSource.getOutputSchema();
        }
    } catch (Exception e) {
        throw new DataFlowException(e.getMessage(), e);
    }
}
Also used : KeywordSourcePredicate(edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate) Attribute(edu.uci.ics.textdb.api.schema.Attribute) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) TextDBException(edu.uci.ics.textdb.api.exception.TextDBException) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)

Example 14 with KeywordMatcherSourceOperator

use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.

the class KeywordMatcherPerformanceTest method match.

/*
     * This function does match for a list of queries
     */
public static void match(ArrayList<String> queryList, KeywordMatchingType opType, String luceneAnalyzerStr, String tableName) throws TextDBException, IOException {
    Attribute[] attributeList = new Attribute[] { MedlineIndexWriter.ABSTRACT_ATTR };
    for (String query : queryList) {
        KeywordSourcePredicate predicate = new KeywordSourcePredicate(query, Utils.getAttributeNames(attributeList), luceneAnalyzerStr, opType, tableName, null);
        KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(predicate);
        long startMatchTime = System.currentTimeMillis();
        keywordSource.open();
        int counter = 0;
        Tuple nextTuple = null;
        while ((nextTuple = keywordSource.getNextTuple()) != null) {
            ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
            List<Span> spanList = spanListField.getValue();
            counter += spanList.size();
        }
        keywordSource.close();
        long endMatchTime = System.currentTimeMillis();
        double matchTime = (endMatchTime - startMatchTime) / 1000.0;
        timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
        totalResultCount += counter;
    }
}
Also used : KeywordSourcePredicate(edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)

Example 15 with KeywordMatcherSourceOperator

use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan3.

/*
     * Test a valid operator graph.
     * 
     *                  --> RegexMatcher -->
     *                  |                    >-- Join1
     * KeywordSource --< -> NlpEntityOperator -->          >-- Join2 --> TupleSink
     *                  |                           /
     *                  --> FuzzyTokenMatcher ----->
     * 
     */
@Test
public void testLogicalPlan3() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan3();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator join2 = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(join2 instanceof Join);
    IOperator join2Input1 = ((Join) join2).getOuterInputOperator();
    Assert.assertTrue(join2Input1 instanceof Join);
    IOperator join2Input2 = ((Join) join2).getInnerInputOperator();
    Assert.assertTrue(join2Input2 instanceof FuzzyTokenMatcher);
    IOperator join1Input1 = ((Join) join2Input1).getInnerInputOperator();
    Assert.assertTrue(join1Input1 instanceof RegexMatcher);
    IOperator join1Input2 = ((Join) join2Input1).getOuterInputOperator();
    Assert.assertTrue(join1Input2 instanceof NlpEntityOperator);
    IOperator connectorOut1 = ((RegexMatcher) join1Input1).getInputOperator();
    Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
    IOperator connectorOut2 = ((NlpEntityOperator) join1Input2).getInputOperator();
    Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
    IOperator connectorOut3 = ((FuzzyTokenMatcher) join2Input2).getInputOperator();
    Assert.assertTrue(connectorOut3 instanceof ConnectorOutputOperator);
    HashSet<Integer> connectorIndices = new HashSet<>();
    connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut3).getOutputIndex());
    Assert.assertEquals(connectorIndices.size(), 3);
    OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
    OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
    OneToNBroadcastConnector connector3 = ((ConnectorOutputOperator) connectorOut3).getOwnerConnector();
    Assert.assertSame(connector1, connector2);
    Assert.assertSame(connector1, connector3);
    IOperator keywordSource = connector1.getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : TupleSink(edu.uci.ics.textdb.exp.sink.tuple.TupleSink) IOperator(edu.uci.ics.textdb.api.dataflow.IOperator) Join(edu.uci.ics.textdb.exp.join.Join) Plan(edu.uci.ics.textdb.api.engine.Plan) FuzzyTokenMatcher(edu.uci.ics.textdb.exp.fuzzytokenmatcher.FuzzyTokenMatcher) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator) ISink(edu.uci.ics.textdb.api.dataflow.ISink) ConnectorOutputOperator(edu.uci.ics.textdb.exp.connector.OneToNBroadcastConnector.ConnectorOutputOperator) NlpEntityOperator(edu.uci.ics.textdb.exp.nlp.entity.NlpEntityOperator) RegexMatcher(edu.uci.ics.textdb.exp.regexmatcher.RegexMatcher) OneToNBroadcastConnector(edu.uci.ics.textdb.exp.connector.OneToNBroadcastConnector) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)26 Test (org.junit.Test)22 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)20 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)19 ArrayList (java.util.ArrayList)12 Span (edu.uci.ics.textdb.api.span.Span)10 IField (edu.uci.ics.textdb.api.field.IField)9 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)9 StringField (edu.uci.ics.textdb.api.field.StringField)9 TextField (edu.uci.ics.textdb.api.field.TextField)9 Schema (edu.uci.ics.textdb.api.schema.Schema)9 KeywordSourcePredicate (edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate)4 IOperator (edu.uci.ics.textdb.api.dataflow.IOperator)3 ISink (edu.uci.ics.textdb.api.dataflow.ISink)3 Plan (edu.uci.ics.textdb.api.engine.Plan)3 Join (edu.uci.ics.textdb.exp.join.Join)3 RegexMatcher (edu.uci.ics.textdb.exp.regexmatcher.RegexMatcher)3 TupleSink (edu.uci.ics.textdb.exp.sink.tuple.TupleSink)3 Attribute (edu.uci.ics.textdb.api.schema.Attribute)2 OneToNBroadcastConnector (edu.uci.ics.textdb.exp.connector.OneToNBroadcastConnector)2