use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.
the class Sampler method constructSampleBuffer.
private void constructSampleBuffer() throws TextDBException {
sampleBuffer = new ArrayList<Tuple>();
Random random = new Random(System.currentTimeMillis());
Tuple tuple;
int count = 0;
while ((tuple = inputOperator.getNextTuple()) != null) {
if (count < predicate.getSampleSize()) {
sampleBuffer.add(tuple);
} else {
/* In SampleType.FIRST_K_ARRIVAL mode, when the samleBuffer is full,
* it will finish constructing the buffer tuples and return.
*/
if (this.predicate.getSampleType() == SampleType.FIRST_K_ARRIVAL) {
break;
}
/*
* In SampleType.RANDOM_SAMPLE mode, the reservoir sampling algorithm is
* used to sample tuples.
* When the buffer is full, the ith incoming tuple is chosen to replace
* tuple the buffer with probability of bufferSize / i.
*/
if (this.predicate.getSampleType() == SampleType.RANDOM_SAMPLE) {
int randomPos = random.nextInt(count);
if (randomPos < predicate.getSampleSize()) {
sampleBuffer.set(randomPos, tuple);
}
}
}
count++;
}
}
use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.
the class OneToNBroadcastConnectorTest method testThreeOutputsWithItself.
/*
* This test tests if the connectors' three outputs are the same.
*/
@Test
public void testThreeOutputsWithItself() throws Exception {
IOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE));
OneToNBroadcastConnector connector = new OneToNBroadcastConnector(3);
connector.setInputOperator(sourceOperator);
IOperator output1 = connector.getOutputOperator(0);
IOperator output2 = connector.getOutputOperator(1);
IOperator output3 = connector.getOutputOperator(2);
output1.open();
output2.open();
output3.open();
List<Tuple> output1Results = new ArrayList<>();
Tuple nextTuple = null;
while ((nextTuple = output1.getNextTuple()) != null) {
output1Results.add(nextTuple);
}
List<Tuple> output2Results = new ArrayList<>();
nextTuple = null;
while ((nextTuple = output2.getNextTuple()) != null) {
output2Results.add(nextTuple);
}
List<Tuple> output3Results = new ArrayList<>();
nextTuple = null;
while ((nextTuple = output3.getNextTuple()) != null) {
output3Results.add(nextTuple);
}
output1.close();
output2.close();
output3.close();
List<Tuple> expectedResults = TestConstants.getSamplePeopleTuples();
Assert.assertTrue(TestUtils.equals(expectedResults, output1Results));
Assert.assertTrue(TestUtils.equals(expectedResults, output2Results));
Assert.assertTrue(TestUtils.equals(expectedResults, output3Results));
}
use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.
the class DictionaryMatcherTest method testSingleWordQueryInStringFieldUsingPhraseChinese.
/**
* Scenario: verifies GetNextTuple of DictionaryMatcher and multiple word
* queries in String Field using PHRASE_INDEXBASED in Chinese.
*/
@Test
public void testSingleWordQueryInStringFieldUsingPhraseChinese() throws Exception {
ArrayList<String> names = new ArrayList<String>(Arrays.asList("长孙", "无忌"));
Dictionary dictionary = new Dictionary(names);
// create a data tuple first
List<Span> list1 = new ArrayList<Span>();
List<Span> list2 = new ArrayList<Span>();
Span span1 = new Span("lastName", 0, 2, "长孙", "长孙");
Span span2 = new Span("firstName", 0, 2, "无忌", "无忌");
list1.add(span1);
list2.add(span2);
Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
for (int count = 0; count < schemaAttributes.length - 1; count++) {
schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
}
schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list1) };
Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
IField[] fields2 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list2) };
Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
List<Tuple> expectedResults = new ArrayList<Tuple>();
expectedResults.add(tuple1);
expectedResults.add(tuple2);
List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
boolean contains = TestUtils.equals(expectedResults, returnedResults);
Assert.assertTrue(contains);
}
use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.
the class ComparableMatcherTest method getQueryResults.
public List<Tuple> getQueryResults(ComparableMatcher comparableMatcher) throws TextDBException {
List<Tuple> returnedResults = new ArrayList<>();
Tuple nextTuple = null;
while ((nextTuple = comparableMatcher.getNextTuple()) != null) {
returnedResults.add(nextTuple);
}
return returnedResults;
}
use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.
the class ComparableMatcherTest method testIntegerMatching3.
/**
* Verifies the behavior of ComparableMatcher<Integer> with matching type GREATER_THAN_OR_EQAUL_TO
*
* @throws Exception
*/
@Test
public void testIntegerMatching3() throws Exception {
// Prepare the query
int threshold = 45;
Attribute attribute = TestConstants.AGE_ATTR;
String attributeName = attribute.getAttributeName();
ComparisonType matchingType = ComparisonType.GREATER_THAN_OR_EQUAL_TO;
// Perform the query
List<Tuple> returnedResults = getIntegerQueryResults(attributeName, matchingType, threshold);
IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry") };
IField[] fields2 = { new StringField("tom hanks"), new StringField("cruise"), new IntegerField(45), new DoubleField(5.95), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1971")), new TextField("Short Brown") };
List<Tuple> expectedResults = new ArrayList<>();
expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields1));
expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields2));
// check the results
Assert.assertEquals(2, returnedResults.size());
Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults));
}
Aggregations