use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.
the class KeywordFieldMapperTests method testDefaults.
public void testDefaults() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "keyword").endObject().endObject().endObject().endObject().string();
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "1234").endObject().bytes());
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals(new BytesRef("1234"), fields[0].binaryValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(new BytesRef("1234"), fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.
the class TextFieldMapperTests method testDefaults.
public void testDefaults() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "text").endObject().endObject().endObject().endObject().string();
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "1234").endObject().bytes());
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.length);
assertEquals("1234", fields[0].stringValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(false));
assertTrue(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
}
use of org.apache.lucene.index.IndexableFieldType in project elasticsearch by elastic.
the class KeywordFieldMapperTests method testNormalizer.
public void testNormalizer() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "keyword").field("normalizer", "my_lowercase").endObject().endObject().endObject().endObject().string();
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "AbC").endObject().bytes());
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals(new BytesRef("abc"), fields[0].binaryValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(new BytesRef("abc"), fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
use of org.apache.lucene.index.IndexableFieldType in project lucene-solr by apache.
the class FacetsConfig method build.
/**
* Translates any added {@link FacetField}s into normal fields for indexing.
*
* <p>
* <b>NOTE:</b> you should add the returned document to IndexWriter, not the
* input one!
*/
public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException {
// Find all FacetFields, collated by the actual field:
Map<String, List<FacetField>> byField = new HashMap<>();
// ... and also all SortedSetDocValuesFacetFields:
Map<String, List<SortedSetDocValuesFacetField>> dvByField = new HashMap<>();
// ... and also all AssociationFacetFields
Map<String, List<AssociationFacetField>> assocByField = new HashMap<>();
Set<String> seenDims = new HashSet<>();
for (IndexableField field : doc) {
if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
String indexFieldName = dimConfig.indexFieldName;
List<FacetField> fields = byField.get(indexFieldName);
if (fields == null) {
fields = new ArrayList<>();
byField.put(indexFieldName, fields);
}
fields.add(facetField);
}
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
String indexFieldName = dimConfig.indexFieldName;
List<SortedSetDocValuesFacetField> fields = dvByField.get(indexFieldName);
if (fields == null) {
fields = new ArrayList<>();
dvByField.put(indexFieldName, fields);
}
fields.add(facetField);
}
if (field.fieldType() == AssociationFacetField.TYPE) {
AssociationFacetField facetField = (AssociationFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
if (dimConfig.hierarchical) {
throw new IllegalArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
}
if (dimConfig.requireDimCount) {
throw new IllegalArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
}
String indexFieldName = dimConfig.indexFieldName;
List<AssociationFacetField> fields = assocByField.get(indexFieldName);
if (fields == null) {
fields = new ArrayList<>();
assocByField.put(indexFieldName, fields);
}
fields.add(facetField);
// Best effort: detect mis-matched types in same
// indexed field:
String type;
if (facetField instanceof IntAssociationFacetField) {
type = "int";
} else if (facetField instanceof FloatAssociationFacetField) {
type = "float";
} else {
type = "bytes";
}
// NOTE: not thread safe, but this is just best effort:
String curType = assocDimTypes.get(indexFieldName);
if (curType == null) {
assocDimTypes.put(indexFieldName, type);
} else if (!curType.equals(type)) {
throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
}
}
}
Document result = new Document();
processFacetFields(taxoWriter, byField, result);
processSSDVFacetFields(dvByField, result);
processAssocFacetFields(taxoWriter, assocByField, result);
for (IndexableField field : doc.getFields()) {
IndexableFieldType ft = field.fieldType();
if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
result.add(field);
}
}
return result;
}
use of org.apache.lucene.index.IndexableFieldType in project lucene-solr by apache.
the class BaseTokenStreamTestCase method checkRandomData.
private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException {
final LineFileDocs docs = new LineFileDocs(random);
Document doc = null;
Field field = null, currentField = null;
StringReader bogus = new StringReader("");
if (iw != null) {
doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
if (random.nextBoolean()) {
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(random.nextBoolean());
ft.setStoreTermVectorPositions(random.nextBoolean());
if (ft.storeTermVectorPositions()) {
ft.setStoreTermVectorPayloads(random.nextBoolean());
}
}
if (random.nextBoolean()) {
ft.setOmitNorms(true);
}
switch(random.nextInt(4)) {
case 0:
ft.setIndexOptions(IndexOptions.DOCS);
break;
case 1:
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
break;
case 2:
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
break;
default:
if (offsetsAreCorrect) {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
} else {
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
}
}
currentField = field = new Field("dummy", bogus, ft);
doc.add(currentField);
}
try {
for (int i = 0; i < iterations; i++) {
String text;
if (random.nextInt(10) == 7) {
// real data from linedocs
text = docs.nextDoc().get("body");
if (text.length() > maxWordLength) {
// Take a random slice from the text...:
int startPos = random.nextInt(text.length() - maxWordLength);
if (startPos > 0 && Character.isLowSurrogate(text.charAt(startPos))) {
// Take care not to split up a surrogate pair:
startPos--;
assert Character.isHighSurrogate(text.charAt(startPos));
}
int endPos = startPos + maxWordLength - 1;
if (Character.isHighSurrogate(text.charAt(endPos))) {
// Take care not to split up a surrogate pair:
endPos--;
}
text = text.substring(startPos, 1 + endPos);
}
} else {
// synthetic
text = TestUtil.randomAnalysisString(random, maxWordLength, simple);
}
try {
checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField);
if (iw != null) {
if (random.nextInt(7) == 0) {
// pile up a multivalued field
IndexableFieldType ft = field.fieldType();
currentField = new Field("dummy", bogus, ft);
doc.add(currentField);
} else {
iw.addDocument(doc);
if (doc.getFields().size() > 1) {
// back to 1 field
currentField = field;
doc.removeFields("dummy");
doc.add(currentField);
}
}
}
} catch (Throwable t) {
// TODO: really we should pass a random seed to
// checkAnalysisConsistency then print it here too:
System.err.println("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + escape(text) + "'");
Rethrow.rethrow(t);
}
}
} finally {
IOUtils.closeWhileHandlingException(docs);
}
}
Aggregations