use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.
the class TestSortedSetDocValuesFacets method testRandom.
public void testRandom() throws Exception {
String[] tokens = getRandomTokens(10);
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
FacetsConfig config = new FacetsConfig();
int numDocs = atLeast(1000);
int numDims = TestUtil.nextInt(random(), 1, 7);
List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
for (TestDoc testDoc : testDocs) {
Document doc = new Document();
doc.add(newStringField("content", testDoc.content, Field.Store.NO));
for (int j = 0; j < numDims; j++) {
if (testDoc.dims[j] != null) {
doc.add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j]));
}
}
w.addDocument(config.build(doc));
}
// NRT open
IndexSearcher searcher = newSearcher(w.getReader());
// Per-top-reader state:
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
ExecutorService exec = randomExecutorServiceOrNull();
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
String searchToken = tokens[random().nextInt(tokens.length)];
if (VERBOSE) {
System.out.println("\nTEST: iter content=" + searchToken);
}
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
Facets facets;
if (exec != null) {
facets = new ConcurrentSortedSetDocValuesFacetCounts(state, fc, exec);
} else {
facets = new SortedSetDocValuesFacetCounts(state, fc);
}
// Slow, yet hopefully bug-free, faceting:
@SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Integer>[] expectedCounts = new HashMap[numDims];
for (int i = 0; i < numDims; i++) {
expectedCounts[i] = new HashMap<>();
}
for (TestDoc doc : testDocs) {
if (doc.content.equals(searchToken)) {
for (int j = 0; j < numDims; j++) {
if (doc.dims[j] != null) {
Integer v = expectedCounts[j].get(doc.dims[j]);
if (v == null) {
expectedCounts[j].put(doc.dims[j], 1);
} else {
expectedCounts[j].put(doc.dims[j], v.intValue() + 1);
}
}
}
}
}
List<FacetResult> expected = new ArrayList<>();
for (int i = 0; i < numDims; i++) {
List<LabelAndValue> labelValues = new ArrayList<>();
int totCount = 0;
for (Map.Entry<String, Integer> ent : expectedCounts[i].entrySet()) {
labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
totCount += ent.getValue();
}
sortLabelValues(labelValues);
if (totCount > 0) {
expected.add(new FacetResult("dim" + i, new String[0], totCount, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
}
}
// Sort by highest value, tie break by value:
sortFacetResults(expected);
List<FacetResult> actual = facets.getAllDims(10);
// Messy: fixup ties
//sortTies(actual);
assertEquals(expected, actual);
}
if (exec != null) {
exec.shutdownNow();
}
w.close();
IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir);
}
use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.
the class TestTaxonomyFacetCounts method testRandom.
public void testRandom() throws Exception {
String[] tokens = getRandomTokens(10);
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
int numDocs = atLeast(1000);
int numDims = TestUtil.nextInt(random(), 1, 7);
List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
for (TestDoc testDoc : testDocs) {
Document doc = new Document();
doc.add(newStringField("content", testDoc.content, Field.Store.NO));
for (int j = 0; j < numDims; j++) {
if (testDoc.dims[j] != null) {
doc.add(new FacetField("dim" + j, testDoc.dims[j]));
}
}
w.addDocument(config.build(tw, doc));
}
// NRT open
IndexSearcher searcher = newSearcher(w.getReader());
// NRT open
TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
String searchToken = tokens[random().nextInt(tokens.length)];
if (VERBOSE) {
System.out.println("\nTEST: iter content=" + searchToken);
}
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
Facets facets = getTaxonomyFacetCounts(tr, config, fc);
// Slow, yet hopefully bug-free, faceting:
@SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Integer>[] expectedCounts = new HashMap[numDims];
for (int i = 0; i < numDims; i++) {
expectedCounts[i] = new HashMap<>();
}
for (TestDoc doc : testDocs) {
if (doc.content.equals(searchToken)) {
for (int j = 0; j < numDims; j++) {
if (doc.dims[j] != null) {
Integer v = expectedCounts[j].get(doc.dims[j]);
if (v == null) {
expectedCounts[j].put(doc.dims[j], 1);
} else {
expectedCounts[j].put(doc.dims[j], v.intValue() + 1);
}
}
}
}
}
List<FacetResult> expected = new ArrayList<>();
for (int i = 0; i < numDims; i++) {
List<LabelAndValue> labelValues = new ArrayList<>();
int totCount = 0;
for (Map.Entry<String, Integer> ent : expectedCounts[i].entrySet()) {
labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
totCount += ent.getValue();
}
sortLabelValues(labelValues);
if (totCount > 0) {
expected.add(new FacetResult("dim" + i, new String[0], totCount, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
}
}
// Sort by highest value, tie break by value:
sortFacetResults(expected);
List<FacetResult> actual = facets.getAllDims(10);
// Messy: fixup ties
sortTies(actual);
assertEquals(expected, actual);
}
w.close();
IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir);
}
use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.
the class TestTaxonomyFacetSumValueSource method testRandom.
public void testRandom() throws Exception {
String[] tokens = getRandomTokens(10);
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
int numDocs = atLeast(1000);
int numDims = TestUtil.nextInt(random(), 1, 7);
List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
for (TestDoc testDoc : testDocs) {
Document doc = new Document();
doc.add(newStringField("content", testDoc.content, Field.Store.NO));
testDoc.value = random().nextFloat();
doc.add(new FloatDocValuesField("value", testDoc.value));
for (int j = 0; j < numDims; j++) {
if (testDoc.dims[j] != null) {
doc.add(new FacetField("dim" + j, testDoc.dims[j]));
}
}
w.addDocument(config.build(tw, doc));
}
// NRT open
IndexSearcher searcher = newSearcher(w.getReader());
// NRT open
TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
String searchToken = tokens[random().nextInt(tokens.length)];
if (VERBOSE) {
System.out.println("\nTEST: iter content=" + searchToken);
}
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, DoubleValuesSource.fromFloatField("value"));
// Slow, yet hopefully bug-free, faceting:
@SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Float>[] expectedValues = new HashMap[numDims];
for (int i = 0; i < numDims; i++) {
expectedValues[i] = new HashMap<>();
}
for (TestDoc doc : testDocs) {
if (doc.content.equals(searchToken)) {
for (int j = 0; j < numDims; j++) {
if (doc.dims[j] != null) {
Float v = expectedValues[j].get(doc.dims[j]);
if (v == null) {
expectedValues[j].put(doc.dims[j], doc.value);
} else {
expectedValues[j].put(doc.dims[j], v + doc.value);
}
}
}
}
}
List<FacetResult> expected = new ArrayList<>();
for (int i = 0; i < numDims; i++) {
List<LabelAndValue> labelValues = new ArrayList<>();
double totValue = 0;
for (Map.Entry<String, Float> ent : expectedValues[i].entrySet()) {
labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
totValue += ent.getValue();
}
sortLabelValues(labelValues);
if (totValue > 0) {
expected.add(new FacetResult("dim" + i, new String[0], totValue, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
}
}
// Sort by highest value, tie break by value:
sortFacetResults(expected);
List<FacetResult> actual = facets.getAllDims(10);
// Messy: fixup ties
sortTies(actual);
if (VERBOSE) {
System.out.println("expected=\n" + expected.toString());
System.out.println("actual=\n" + actual.toString());
}
assertFloatValuesEquals(expected, actual);
}
w.close();
IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir);
}
use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.
the class TestTaxonomyFacetCounts2 method testAllCounts.
@Test
public void testAllCounts() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_A, CP_A);
assertEquals(-1, result.value.intValue());
int prevValue = Integer.MAX_VALUE;
for (LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
prevValue = labelValue.value.intValue();
}
result = facets.getTopChildren(NUM_CHILDREN_CP_B, CP_B);
assertEquals(allExpectedCounts.get(CP_B), result.value);
prevValue = Integer.MAX_VALUE;
for (LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
prevValue = labelValue.value.intValue();
}
IOUtils.close(indexReader, taxoReader);
}
use of org.apache.lucene.facet.LabelAndValue in project lucene-solr by apache.
the class TestTaxonomyFacetCounts2 method testBigNumResults.
@Test
public void testBigNumResults() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, CP_A);
assertEquals(-1, result.value.intValue());
for (LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
}
result = facets.getTopChildren(Integer.MAX_VALUE, CP_B);
assertEquals(allExpectedCounts.get(CP_B), result.value);
for (LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
}
IOUtils.close(indexReader, taxoReader);
}
Aggregations