Search in sources :

Example 1 with IntAssociationFacetField

use of org.apache.lucene.facet.taxonomy.IntAssociationFacetField in project lucene-solr by apache.

the class FacetsConfig method build.

/**
   * Translates any added {@link FacetField}s into normal fields for indexing.
   * 
   * <p>
   * <b>NOTE:</b> you should add the returned document to IndexWriter, not the
   * input one!
   */
public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException {
    // Find all FacetFields, collated by the actual field:
    Map<String, List<FacetField>> byField = new HashMap<>();
    // ... and also all SortedSetDocValuesFacetFields:
    Map<String, List<SortedSetDocValuesFacetField>> dvByField = new HashMap<>();
    // ... and also all AssociationFacetFields
    Map<String, List<AssociationFacetField>> assocByField = new HashMap<>();
    Set<String> seenDims = new HashSet<>();
    for (IndexableField field : doc) {
        if (field.fieldType() == FacetField.TYPE) {
            FacetField facetField = (FacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<FacetField> fields = byField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                byField.put(indexFieldName, fields);
            }
            fields.add(facetField);
        }
        if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
            SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<SortedSetDocValuesFacetField> fields = dvByField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                dvByField.put(indexFieldName, fields);
            }
            fields.add(facetField);
        }
        if (field.fieldType() == AssociationFacetField.TYPE) {
            AssociationFacetField facetField = (AssociationFacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            if (dimConfig.hierarchical) {
                throw new IllegalArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
            }
            if (dimConfig.requireDimCount) {
                throw new IllegalArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<AssociationFacetField> fields = assocByField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                assocByField.put(indexFieldName, fields);
            }
            fields.add(facetField);
            // Best effort: detect mis-matched types in same
            // indexed field:
            String type;
            if (facetField instanceof IntAssociationFacetField) {
                type = "int";
            } else if (facetField instanceof FloatAssociationFacetField) {
                type = "float";
            } else {
                type = "bytes";
            }
            // NOTE: not thread safe, but this is just best effort:
            String curType = assocDimTypes.get(indexFieldName);
            if (curType == null) {
                assocDimTypes.put(indexFieldName, type);
            } else if (!curType.equals(type)) {
                throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
            }
        }
    }
    Document result = new Document();
    processFacetFields(taxoWriter, byField, result);
    processSSDVFacetFields(dvByField, result);
    processAssocFacetFields(taxoWriter, assocByField, result);
    for (IndexableField field : doc.getFields()) {
        IndexableFieldType ft = field.fieldType();
        if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
            result.add(field);
        }
    }
    return result;
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) ArrayList(java.util.ArrayList) List(java.util.List) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) HashSet(java.util.HashSet)

Example 2 with IntAssociationFacetField

use of org.apache.lucene.facet.taxonomy.IntAssociationFacetField in project lucene-solr by apache.

the class AssociationsFacetsExample method index.

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    indexWriter.close();
    taxoWriter.close();
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexWriter(org.apache.lucene.index.IndexWriter) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) Document(org.apache.lucene.document.Document) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

Document (org.apache.lucene.document.Document)2 FloatAssociationFacetField (org.apache.lucene.facet.taxonomy.FloatAssociationFacetField)2 IntAssociationFacetField (org.apache.lucene.facet.taxonomy.IntAssociationFacetField)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)1 SortedSetDocValuesFacetField (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField)1 AssociationFacetField (org.apache.lucene.facet.taxonomy.AssociationFacetField)1 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 IndexableField (org.apache.lucene.index.IndexableField)1 IndexableFieldType (org.apache.lucene.index.IndexableFieldType)1