Search in sources :

Example 1 with KeywordSourcePredicate

use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.

the class KeywordMatcherPerformanceTest method match.

/*
     * This function does match for a list of queries
     */
public static void match(ArrayList<String> queryList, KeywordMatchingType opType, String luceneAnalyzerStr, String tableName) throws TexeraException, IOException {
    String[] attributeNames = new String[] { MedlineIndexWriter.ABSTRACT };
    for (String query : queryList) {
        KeywordSourcePredicate predicate = new KeywordSourcePredicate(query, Arrays.asList(attributeNames), luceneAnalyzerStr, opType, tableName, SchemaConstants.SPAN_LIST);
        KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(predicate);
        long startMatchTime = System.currentTimeMillis();
        keywordSource.open();
        int counter = 0;
        Tuple nextTuple = null;
        while ((nextTuple = keywordSource.getNextTuple()) != null) {
            ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
            List<Span> spanList = spanListField.getValue();
            counter += spanList.size();
        }
        keywordSource.close();
        long endMatchTime = System.currentTimeMillis();
        double matchTime = (endMatchTime - startMatchTime) / 1000.0;
        timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
        totalResultCount += counter;
    }
}
Also used : KeywordSourcePredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)

Example 2 with KeywordSourcePredicate

use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.

the class PredicateBaseTest method testKeyword.

@Test
public void testKeyword() throws Exception {
    KeywordPredicate keywordPredicate = new KeywordPredicate("keyword", attributeNames, "standard", KeywordMatchingType.CONJUNCTION_INDEXBASED, "keywordResults");
    testPredicate(keywordPredicate);
    KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate("keyword", attributeNames, "standard", KeywordMatchingType.CONJUNCTION_INDEXBASED, "tableName", "keywordSourceResults");
    testPredicate(keywordSourcePredicate);
}
Also used : KeywordSourcePredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate) KeywordPredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordPredicate) Test(org.junit.Test)

Example 3 with KeywordSourcePredicate

use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.

the class JoinTestHelper method getKeywordSource.

/**
 * Provides a KeywordMatcherSourceOperator for a test table given a keyword.
 * ( KeywordMatcher is used in most of Join test cases )
 * @param tableName
 * @param query
 * @param matchingType
 * @return
 * @throws TexeraException
 */
public static KeywordMatcherSourceOperator getKeywordSource(String tableName, String query, KeywordMatchingType matchingType) throws TexeraException {
    KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(query, Arrays.asList(JoinTestConstants.AUTHOR, JoinTestConstants.TITLE, JoinTestConstants.REVIEW), RelationManager.getInstance().getTableAnalyzerString(tableName), matchingType, tableName, SchemaConstants.SPAN_LIST);
    KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
    return keywordSource;
}
Also used : KeywordSourcePredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)

Example 4 with KeywordSourcePredicate

use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.

the class DictionaryMatcherSourceOperator method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    currentDictionaryEntry = predicate.getDictionary().getNextEntry();
    if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED || predicate.getKeywordMatchingType() == KeywordMatchingType.REGEX) {
        // For Substring matching and Regex matching, create a scan source operator followed by a dictionary matcher.
        indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
        dictionaryMatcher = new DictionaryMatcher(new DictionaryPredicate(predicate.getDictionary(), predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getSpanListName()));
        dictionaryMatcher.setInputOperator(indexSource);
        dictionaryMatcher.open();
        outputSchema = dictionaryMatcher.getOutputSchema();
    } else {
        // For other keyword matching types (CONJUNCTION and PHRASE),
        // create an index-based keyword source operator.
        keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
        keywordSource.open();
        // Other keyword matching types uses a KeywordMatcher, so the
        // output schema is the same as keywordMatcher's schema.
        outputSchema = keywordSource.getOutputSchema();
    }
    cursor = OPENED;
}
Also used : KeywordSourcePredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)

Example 5 with KeywordSourcePredicate

use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.

the class DictionaryMatcherSourceOperator method computeMatchingResults.

/**
 *  Maintain a HashMap </Tuple_ID, Tuple> to compute all the keyword
 *  matching results for each tuple.
 *
 * @param resultMap
 */
@SuppressWarnings("unchecked")
private void computeMatchingResults() {
    Tuple inputTuple;
    while (true) {
        while ((inputTuple = keywordSource.getNextTuple()) != null) {
            String tupleID = inputTuple.getField(SchemaConstants._ID).getValue().toString();
            ListField<Span> keywordResultsField = inputTuple.getField(predicate.getSpanListName(), ListField.class);
            List<Span> keywordResults = keywordResultsField.getValue();
            if (tupleResultMap.containsKey(tupleID)) {
                tupleResultMap.get(tupleID).addAll(keywordResults);
            } else {
                tupleIDMap.put(tupleID, new Tuple.Builder(inputTuple).remove(predicate.getSpanListName()).build());
                tupleResultMap.put(tupleID, new ArrayList<>(keywordResults));
            }
        }
        if ((currentDictionaryEntry = predicate.getDictionary().getNextEntry()) == null) {
            return;
        }
        keywordSource.close();
        KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName());
        keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
        keywordSource.open();
    }
}
Also used : KeywordSourcePredicate(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)

Aggregations

KeywordSourcePredicate (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate)5 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)4 Span (edu.uci.ics.texera.api.span.Span)2 Tuple (edu.uci.ics.texera.api.tuple.Tuple)2 KeywordPredicate (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordPredicate)1 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)1 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)1 Test (org.junit.Test)1