use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.
the class LindenSchemaBuilder method parseFiled.
private static LindenFieldSchema parseFiled(Element element) {
LindenFieldSchema field = new LindenFieldSchema();
if (element.hasAttribute(NAME)) {
field.setName(element.getAttribute(NAME));
}
// the linden type has string but not has text
if (element.hasAttribute(TYPE)) {
String type = element.getAttribute(TYPE).toUpperCase();
if (type.toLowerCase().equals(TEXT)) {
field.setType(LindenType.STRING);
} else if (type.toLowerCase().equals(INT)) {
field.setType(LindenType.INTEGER);
} else {
field.setType(LindenType.valueOf(type));
}
}
if (element.hasAttribute(STORE)) {
String store = element.getAttribute(STORE);
field.setStored(store.toLowerCase().equals(YES));
}
// three factors composite the index
if (element.hasAttribute(TOKENIZED)) {
String tokenize = element.getAttribute(TOKENIZED);
field.setTokenized(tokenize.toLowerCase().equals(YES));
}
if (element.hasAttribute(OMITNORMS)) {
String omitNorms = element.getAttribute(OMITNORMS);
field.setOmitNorms(omitNorms.toLowerCase().equals(YES));
}
if (element.hasAttribute(OMITFREQS)) {
String omitNorms = element.getAttribute(OMITFREQS);
field.setOmitFreqs(omitNorms.toLowerCase().equals(YES));
}
if (element.hasAttribute(SNIPPET)) {
String snippet = element.getAttribute(SNIPPET);
field.setSnippet(snippet.toLowerCase().equals(YES));
}
if (element.hasAttribute(DOCVALUES)) {
String docValues = element.getAttribute(DOCVALUES);
field.setDocValues(docValues.toLowerCase().equals(YES));
}
if (element.hasAttribute(MULTI)) {
String multi = element.getAttribute(MULTI);
field.setMulti(multi.toLowerCase().equals(YES));
}
// we must deal with it first
if (element.hasAttribute(INDEX)) {
String index = element.getAttribute(INDEX).toUpperCase();
if (index.toLowerCase().equals(YES) || index.toLowerCase().equals(NO)) {
field.setIndexed(index.toLowerCase().equals(YES));
}
}
return field;
}
use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.
the class LindenSchemaBuilder method build.
public static LindenSchema build(File schemaFile) throws Exception {
Preconditions.checkNotNull(schemaFile);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document dom = db.parse(schemaFile);
dom.getDocumentElement().normalize();
NodeList table = dom.getElementsByTagName(TABLE);
Node colNode = table.item(0);
LindenSchema lindenSchema = new LindenSchema();
lindenSchema.setId(colNode.getAttributes().getNamedItem(TABLE_ID).getNodeValue());
NodeList colList = dom.getElementsByTagName(TABLE_COLUMN);
for (int i = 0; i < colList.getLength(); ++i) {
Element element = (Element) colList.item(i);
LindenFieldSchema fieldSchema = parseFiled(element);
verifyFieldSchema(fieldSchema);
lindenSchema.addToFields(fieldSchema);
}
return lindenSchema;
}
use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.
the class LindenUtil method getSource.
/**
* Get fields by doc id.
*
* @param indexSearcher The IndexSearcher
* @param docId Doc ID.
* @param id Id field value
* @param sourceFields Specify the fields, if null get all fields values.
* @param config the lindenConfig for search
* @return JSON String which contains field values.
* @throws IOException
*/
public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields, LindenConfig config) throws IOException {
List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves();
int idx = ReaderUtil.subIndex(docId, leaves);
AtomicReaderContext atomicReaderContext = leaves.get(idx);
AtomicReader reader = atomicReaderContext.reader();
int locDocId = docId - atomicReaderContext.docBase;
JSONObject src = new JSONObject();
String idFieldName = config.getSchema().getId();
if (id != null) {
src.put(idFieldName, id);
} else {
src.put(idFieldName, FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString());
}
List<LindenFieldSchema> fields = new ArrayList<>();
if (sourceFields != null && !sourceFields.isEmpty()) {
for (String sourceField : sourceFields) {
if (sourceField.equals(idFieldName)) {
continue;
}
LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField);
fields.add(fieldSchema);
}
} else {
fields.addAll(config.getSchema().getFields());
}
Map<String, LindenFieldSchema> storedFields = new HashMap<>();
for (LindenFieldSchema fieldSchema : fields) {
String name = fieldSchema.getName();
boolean fieldCache = false;
if (fieldSchema.isMulti()) {
/**
* multi-field has multiple values, each value is indexed to the document according to field type
* multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]"
* multi-field source value is stored in BinaryDocValues
*/
String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
if (StringUtils.isNotEmpty(blob)) {
src.put(name, JSON.parseArray(blob));
}
} else if (fieldSchema.isDocValues()) {
fieldCache = true;
} else if (fieldSchema.isIndexed() && fieldSchema.isStored()) {
// field cache doesn't support tokenized string field
if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) {
fieldCache = true;
} else {
storedFields.put(name, fieldSchema);
}
} else if (fieldSchema.isIndexed()) {
if (!possibleTokenizedString(fieldSchema)) {
fieldCache = true;
}
} else if (fieldSchema.isStored()) {
storedFields.put(name, fieldSchema);
}
if (fieldCache) {
Object val;
switch(fieldSchema.getType()) {
case STRING:
case FACET:
val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
String v = (String) val;
fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId);
break;
case INTEGER:
val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId);
fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId);
break;
case LONG:
val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId);
fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId);
break;
case FLOAT:
val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId);
fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId);
break;
case DOUBLE:
val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId);
fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId);
break;
default:
throw new IllegalStateException("Unsupported linden type");
}
if (fieldCache) {
src.put(name, val);
}
}
}
if (!storedFields.isEmpty()) {
Document doc = indexSearcher.doc(docId, storedFields.keySet());
for (IndexableField field : doc.getFields()) {
String name = field.name();
LindenFieldSchema schema = storedFields.get(name);
Object obj = src.get(name);
Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType());
if (obj == null) {
if (schema.isMulti()) {
JSONArray array = new JSONArray();
array.add(val);
src.put(name, array);
} else {
src.put(name, val);
}
} else if (obj instanceof JSONArray) {
((JSONArray) obj).add(val);
} else {
JSONArray array = new JSONArray();
array.add(obj);
array.add(val);
src.put(name, array);
}
}
}
return src.toJSONString();
}
use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.
the class TestLindenWordDelimiterAnalyzer method init.
@Override
public void init() {
lindenConfig.setIndexType(LindenConfig.IndexType.RAM);
lindenConfig.setClusterUrl("127.0.0.1:2181/mock");
LindenSchema schema = new LindenSchema().setId("id");
schema.addToFields(new LindenFieldSchema().setName("title").setIndexed(true).setTokenized(true).setSnippet(true).setMulti(true));
lindenConfig.setSchema(schema);
lindenConfig.putToProperties("search.analyzer.class", "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
lindenConfig.putToProperties("index.analyzer.class", "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
lindenConfig.putToProperties("index.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
lindenConfig.putToProperties("search.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
}
use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.
the class TestFlexibleQuery method init.
@Override
public void init() throws Exception {
LindenSchema schema = new LindenSchema().setId("id");
schema.addToFields(new LindenFieldSchema().setName("text").setIndexed(true).setStored(true).setTokenized(true));
schema.addToFields(new LindenFieldSchema().setName("title").setIndexed(true).setStored(true).setTokenized(true));
lindenConfig.setSchema(schema);
}
Aggregations