use of edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenSourcePredicate in project textdb by TextDB.
the class PredicateBaseTest method testFuzzyToken.
@Test
public void testFuzzyToken() throws Exception {
FuzzyTokenPredicate fuzzyTokenPredicate = new FuzzyTokenPredicate("token1 token2 token3", attributeNames, "standard", 0.8, "spanListName");
testPredicate(fuzzyTokenPredicate);
FuzzyTokenSourcePredicate fuzzyTokenSourcePredicate = new FuzzyTokenSourcePredicate("token1 token2 token3", attributeNames, "standard", 0.8, "tableName", "spanListName");
testPredicate(fuzzyTokenSourcePredicate);
}
use of edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenSourcePredicate in project textdb by TextDB.
the class FuzzyTokenMatcherPerformanceTest method match.
/*
* This function does match for a list of queries
*/
public static void match(ArrayList<String> queryList, double threshold, String luceneAnalyzerStr, String tableName, boolean bool) throws TexeraException, IOException {
List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
for (String query : queryList) {
FuzzyTokenSourcePredicate predicate = new FuzzyTokenSourcePredicate(query, attributeNames, luceneAnalyzerStr, threshold, tableName, SchemaConstants.SPAN_LIST);
FuzzyTokenMatcherSourceOperator fuzzyTokenSource = new FuzzyTokenMatcherSourceOperator(predicate);
long startMatchTime = System.currentTimeMillis();
fuzzyTokenSource.open();
int counter = 0;
Tuple nextTuple = null;
while ((nextTuple = fuzzyTokenSource.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
fuzzyTokenSource.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
totalResultCount += counter;
}
}
use of edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenSourcePredicate in project textdb by TextDB.
the class JoinDistanceTest method testOneOfTheOperatorResultContainsNoSpan.
// This case tests for the scenario when one of the operators result lists has no span.
// If one of the operators doesn't have span, then an exception will be thrown.
// Test result: DataflowException is thrown
@Test(expected = DataflowException.class)
public void testOneOfTheOperatorResultContainsNoSpan() throws Exception {
JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
String fuzzyTokenQuery = "this writer writes well";
double thresholdRatio = 0.25;
List<String> textAttributeNames = JoinTestConstants.BOOK_SCHEMA.getAttributes().stream().filter(attr -> attr.getType() != AttributeType.TEXT).map(Attribute::getName).collect(Collectors.toList());
FuzzyTokenSourcePredicate fuzzySourcePredicateInner = new FuzzyTokenSourcePredicate(fuzzyTokenQuery, textAttributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), thresholdRatio, BOOK_TABLE, SchemaConstants.SPAN_LIST);
FuzzyTokenMatcherSourceOperator fuzzyMatcherInner = new FuzzyTokenMatcherSourceOperator(fuzzySourcePredicateInner);
ProjectionPredicate removeSpanListPredicate = new ProjectionPredicate(JoinTestConstants.BOOK_SCHEMA.getAttributeNames());
ProjectionOperator removeSpanListProjection = new ProjectionOperator(removeSpanListPredicate);
removeSpanListProjection.setInputOperator(fuzzyMatcherInner);
JoinTestHelper.getJoinDistanceResults(keywordSourceOuter, removeSpanListProjection, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
}
Aggregations