use of org.apache.jena.atlas.data.DistinctDataNet in project jena by apache.
the class TestDistinctDataNet method testDistinct2.
@Test
public void testDistinct2() {
List<Binding> undistinct = new ArrayList<>();
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b02);
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b12);
undistinct.add(b02);
undistinct.add(x10);
List<Binding> control = Iter.toList(Iter.distinct(undistinct.iterator()));
List<Binding> distinct = new ArrayList<>();
DistinctDataNet<Binding> db = new DistinctDataNet<>(new ThresholdPolicyCount<Binding>(2), SerializationFactoryFinder.bindingSerializationFactory(), new BindingComparator(new ArrayList<SortCondition>()));
try {
for (Binding b : undistinct) {
if (db.netAdd(b)) {
distinct.add(b);
}
}
Iterator<Binding> iter = db.netIterator();
while (iter.hasNext()) {
distinct.add(iter.next());
}
Iter.close(iter);
} finally {
db.close();
}
assertEquals(control.size(), distinct.size());
assertTrue(ResultSetCompare.equalsByTest(control, distinct, NodeUtils.sameTerm));
}
use of org.apache.jena.atlas.data.DistinctDataNet in project jena by apache.
the class TestDistinctDataNet method testDistinct.
@Test
public void testDistinct() {
List<Binding> undistinct = new ArrayList<>();
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b02);
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b12);
undistinct.add(b02);
undistinct.add(x10);
List<Binding> control = Iter.toList(Iter.distinct(undistinct.iterator()));
List<Binding> distinct = new ArrayList<>();
DistinctDataNet<Binding> db = new DistinctDataNet<>(new ThresholdPolicyCount<Binding>(2), SerializationFactoryFinder.bindingSerializationFactory(), new BindingComparator(new ArrayList<SortCondition>()));
try {
db.addAll(undistinct);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext()) {
distinct.add(iter.next());
}
Iter.close(iter);
} finally {
db.close();
}
assertEquals(control.size(), distinct.size());
assertTrue(ResultSetCompare.equalsByTest(control, distinct, NodeUtils.sameTerm));
}
use of org.apache.jena.atlas.data.DistinctDataNet in project jena by apache.
the class TestDistinctDataNet method testTemporaryFilesAreCleanedUpAfterCompletion.
@Test
public void testTemporaryFilesAreCleanedUpAfterCompletion() {
List<Binding> undistinct = new ArrayList<>();
random = new Random();
Var[] vars = new Var[] { Var.alloc("1"), Var.alloc("2"), Var.alloc("3"), Var.alloc("4"), Var.alloc("5"), Var.alloc("6"), Var.alloc("7"), Var.alloc("8"), Var.alloc("9"), Var.alloc("0") };
for (int i = 0; i < 500; i++) {
undistinct.add(randomBinding(vars));
}
DistinctDataNet<Binding> db = new DistinctDataNet<>(new ThresholdPolicyCount<Binding>(10), SerializationFactoryFinder.bindingSerializationFactory(), new BindingComparator(new ArrayList<SortCondition>()));
List<File> spillFiles = new ArrayList<>();
try {
db.addAll(undistinct);
spillFiles.addAll(db.getSpillFiles());
int count = 0;
for (File file : spillFiles) {
if (file.exists()) {
count++;
}
}
// 500 bindings divided into 50 chunks (49 in files, and 1 in memory)
assertEquals(49, count);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext()) {
iter.next();
}
Iter.close(iter);
} finally {
db.close();
}
int count = 0;
for (File file : spillFiles) {
if (file.exists()) {
count++;
}
}
assertEquals(0, count);
}
Aggregations