Search in sources :

Example 1 with IndexableFieldType

use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.

the class KeywordFieldMapperTests method testDefaults.

public void testDefaults() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "keyword").endObject().endObject().endObject().endObject().string();
    DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
    assertEquals(mapping, mapper.mappingSource().toString());
    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "1234").endObject().bytes());
    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);
    assertEquals(new BytesRef("1234"), fields[0].binaryValue());
    IndexableFieldType fieldType = fields[0].fieldType();
    assertThat(fieldType.omitNorms(), equalTo(true));
    assertFalse(fieldType.tokenized());
    assertFalse(fieldType.stored());
    assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
    assertThat(fieldType.storeTermVectors(), equalTo(false));
    assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
    assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
    assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
    assertEquals(DocValuesType.NONE, fieldType.docValuesType());
    assertEquals(new BytesRef("1234"), fields[1].binaryValue());
    fieldType = fields[1].fieldType();
    assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
    assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) Matchers.containsString(org.hamcrest.Matchers.containsString) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with IndexableFieldType

use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.

the class TextFieldMapperTests method testDefaults.

public void testDefaults() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").endObject().endObject().endObject().endObject().string();
    DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
    assertEquals(mapping, mapper.mappingSource().toString());
    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "1234").endObject().bytes());
    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(1, fields.length);
    assertEquals("1234", fields[0].stringValue());
    IndexableFieldType fieldType = fields[0].fieldType();
    assertThat(fieldType.omitNorms(), equalTo(false));
    assertTrue(fieldType.tokenized());
    assertFalse(fieldType.stored());
    assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS));
    assertThat(fieldType.storeTermVectors(), equalTo(false));
    assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
    assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
    assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
    assertEquals(DocValuesType.NONE, fieldType.docValuesType());
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) Matchers.containsString(org.hamcrest.Matchers.containsString)

Example 3 with IndexableFieldType

use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.

the class KeywordFieldMapperTests method testNormalizer.

public void testNormalizer() throws IOException {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject().endObject().endObject().string();
    DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
    assertEquals(mapping, mapper.mappingSource().toString());
    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "AbC").endObject().bytes());
    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);
    assertEquals(new BytesRef("abc"), fields[0].binaryValue());
    IndexableFieldType fieldType = fields[0].fieldType();
    assertThat(fieldType.omitNorms(), equalTo(true));
    assertFalse(fieldType.tokenized());
    assertFalse(fieldType.stored());
    assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
    assertThat(fieldType.storeTermVectors(), equalTo(false));
    assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
    assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
    assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
    assertEquals(DocValuesType.NONE, fieldType.docValuesType());
    assertEquals(new BytesRef("abc"), fields[1].binaryValue());
    fieldType = fields[1].fieldType();
    assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
    assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) Matchers.containsString(org.hamcrest.Matchers.containsString) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with IndexableFieldType

use of org.apache.lucene.index.IndexableFieldType in project lucene-solr by apache.

the class FacetsConfig method build.

/**
   * Translates any added {@link FacetField}s into normal fields for indexing.
   * 
   * <p>
   * <b>NOTE:</b> you should add the returned document to IndexWriter, not the
   * input one!
   */
public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException {
    // Find all FacetFields, collated by the actual field:
    Map<String, List<FacetField>> byField = new HashMap<>();
    // ... and also all SortedSetDocValuesFacetFields:
    Map<String, List<SortedSetDocValuesFacetField>> dvByField = new HashMap<>();
    // ... and also all AssociationFacetFields
    Map<String, List<AssociationFacetField>> assocByField = new HashMap<>();
    Set<String> seenDims = new HashSet<>();
    for (IndexableField field : doc) {
        if (field.fieldType() == FacetField.TYPE) {
            FacetField facetField = (FacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<FacetField> fields = byField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                byField.put(indexFieldName, fields);
            }
            fields.add(facetField);
        }
        if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
            SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<SortedSetDocValuesFacetField> fields = dvByField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                dvByField.put(indexFieldName, fields);
            }
            fields.add(facetField);
        }
        if (field.fieldType() == AssociationFacetField.TYPE) {
            AssociationFacetField facetField = (AssociationFacetField) field;
            FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
            if (dimConfig.multiValued == false) {
                checkSeen(seenDims, facetField.dim);
            }
            if (dimConfig.hierarchical) {
                throw new IllegalArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
            }
            if (dimConfig.requireDimCount) {
                throw new IllegalArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
            }
            String indexFieldName = dimConfig.indexFieldName;
            List<AssociationFacetField> fields = assocByField.get(indexFieldName);
            if (fields == null) {
                fields = new ArrayList<>();
                assocByField.put(indexFieldName, fields);
            }
            fields.add(facetField);
            // Best effort: detect mis-matched types in same
            // indexed field:
            String type;
            if (facetField instanceof IntAssociationFacetField) {
                type = "int";
            } else if (facetField instanceof FloatAssociationFacetField) {
                type = "float";
            } else {
                type = "bytes";
            }
            // NOTE: not thread safe, but this is just best effort:
            String curType = assocDimTypes.get(indexFieldName);
            if (curType == null) {
                assocDimTypes.put(indexFieldName, type);
            } else if (!curType.equals(type)) {
                throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
            }
        }
    }
    Document result = new Document();
    processFacetFields(taxoWriter, byField, result);
    processSSDVFacetFields(dvByField, result);
    processAssocFacetFields(taxoWriter, assocByField, result);
    for (IndexableField field : doc.getFields()) {
        IndexableFieldType ft = field.fieldType();
        if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
            result.add(field);
        }
    }
    return result;
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) ArrayList(java.util.ArrayList) List(java.util.List) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) HashSet(java.util.HashSet)

Example 5 with IndexableFieldType

use of org.apache.lucene.index.IndexableFieldType in project lucene-solr by apache.

the class BaseTokenStreamTestCase method checkRandomData.

private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException {
    final LineFileDocs docs = new LineFileDocs(random);
    Document doc = null;
    Field field = null, currentField = null;
    StringReader bogus = new StringReader("");
    if (iw != null) {
        doc = new Document();
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        if (random.nextBoolean()) {
            ft.setStoreTermVectors(true);
            ft.setStoreTermVectorOffsets(random.nextBoolean());
            ft.setStoreTermVectorPositions(random.nextBoolean());
            if (ft.storeTermVectorPositions()) {
                ft.setStoreTermVectorPayloads(random.nextBoolean());
            }
        }
        if (random.nextBoolean()) {
            ft.setOmitNorms(true);
        }
        switch(random.nextInt(4)) {
            case 0:
                ft.setIndexOptions(IndexOptions.DOCS);
                break;
            case 1:
                ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
                break;
            case 2:
                ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
                break;
            default:
                if (offsetsAreCorrect) {
                    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                } else {
                    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
                }
        }
        currentField = field = new Field("dummy", bogus, ft);
        doc.add(currentField);
    }
    try {
        for (int i = 0; i < iterations; i++) {
            String text;
            if (random.nextInt(10) == 7) {
                // real data from linedocs
                text = docs.nextDoc().get("body");
                if (text.length() > maxWordLength) {
                    // Take a random slice from the text...:
                    int startPos = random.nextInt(text.length() - maxWordLength);
                    if (startPos > 0 && Character.isLowSurrogate(text.charAt(startPos))) {
                        // Take care not to split up a surrogate pair:
                        startPos--;
                        assert Character.isHighSurrogate(text.charAt(startPos));
                    }
                    int endPos = startPos + maxWordLength - 1;
                    if (Character.isHighSurrogate(text.charAt(endPos))) {
                        // Take care not to split up a surrogate pair:
                        endPos--;
                    }
                    text = text.substring(startPos, 1 + endPos);
                }
            } else {
                // synthetic
                text = TestUtil.randomAnalysisString(random, maxWordLength, simple);
            }
            try {
                checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField);
                if (iw != null) {
                    if (random.nextInt(7) == 0) {
                        // pile up a multivalued field
                        IndexableFieldType ft = field.fieldType();
                        currentField = new Field("dummy", bogus, ft);
                        doc.add(currentField);
                    } else {
                        iw.addDocument(doc);
                        if (doc.getFields().size() > 1) {
                            // back to 1 field
                            currentField = field;
                            doc.removeFields("dummy");
                            doc.add(currentField);
                        }
                    }
                }
            } catch (Throwable t) {
                // TODO: really we should pass a random seed to
                // checkAnalysisConsistency then print it here too:
                System.err.println("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + escape(text) + "'");
                Rethrow.rethrow(t);
            }
        }
    } finally {
        IOUtils.closeWhileHandlingException(docs);
    }
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StringReader(java.io.StringReader) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) Document(org.apache.lucene.document.Document) LineFileDocs(org.apache.lucene.util.LineFileDocs) FieldType(org.apache.lucene.document.FieldType) IndexableFieldType(org.apache.lucene.index.IndexableFieldType)

Aggregations

IndexableFieldType (org.apache.lucene.index.IndexableFieldType)5 IndexableField (org.apache.lucene.index.IndexableField)4 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)3 Matchers.containsString (org.hamcrest.Matchers.containsString)3 Document (org.apache.lucene.document.Document)2 BytesRef (org.apache.lucene.util.BytesRef)2 StringReader (java.io.StringReader)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 TextField (org.apache.lucene.document.TextField)1 SortedSetDocValuesFacetField (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField)1 AssociationFacetField (org.apache.lucene.facet.taxonomy.AssociationFacetField)1 FloatAssociationFacetField (org.apache.lucene.facet.taxonomy.FloatAssociationFacetField)1 IntAssociationFacetField (org.apache.lucene.facet.taxonomy.IntAssociationFacetField)1 LineFileDocs (org.apache.lucene.util.LineFileDocs)1