Search in sources :

Example 11 with LindenFieldSchema

use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.

the class LindenSchemaBuilder method parseFiled.

private static LindenFieldSchema parseFiled(Element element) {
    LindenFieldSchema field = new LindenFieldSchema();
    if (element.hasAttribute(NAME)) {
        field.setName(element.getAttribute(NAME));
    }
    // the linden type has string but not has text
    if (element.hasAttribute(TYPE)) {
        String type = element.getAttribute(TYPE).toUpperCase();
        if (type.toLowerCase().equals(TEXT)) {
            field.setType(LindenType.STRING);
        } else if (type.toLowerCase().equals(INT)) {
            field.setType(LindenType.INTEGER);
        } else {
            field.setType(LindenType.valueOf(type));
        }
    }
    if (element.hasAttribute(STORE)) {
        String store = element.getAttribute(STORE);
        field.setStored(store.toLowerCase().equals(YES));
    }
    // three factors composite the index
    if (element.hasAttribute(TOKENIZED)) {
        String tokenize = element.getAttribute(TOKENIZED);
        field.setTokenized(tokenize.toLowerCase().equals(YES));
    }
    if (element.hasAttribute(OMITNORMS)) {
        String omitNorms = element.getAttribute(OMITNORMS);
        field.setOmitNorms(omitNorms.toLowerCase().equals(YES));
    }
    if (element.hasAttribute(OMITFREQS)) {
        String omitNorms = element.getAttribute(OMITFREQS);
        field.setOmitFreqs(omitNorms.toLowerCase().equals(YES));
    }
    if (element.hasAttribute(SNIPPET)) {
        String snippet = element.getAttribute(SNIPPET);
        field.setSnippet(snippet.toLowerCase().equals(YES));
    }
    if (element.hasAttribute(DOCVALUES)) {
        String docValues = element.getAttribute(DOCVALUES);
        field.setDocValues(docValues.toLowerCase().equals(YES));
    }
    if (element.hasAttribute(MULTI)) {
        String multi = element.getAttribute(MULTI);
        field.setMulti(multi.toLowerCase().equals(YES));
    }
    // we must deal with it first
    if (element.hasAttribute(INDEX)) {
        String index = element.getAttribute(INDEX).toUpperCase();
        if (index.toLowerCase().equals(YES) || index.toLowerCase().equals(NO)) {
            field.setIndexed(index.toLowerCase().equals(YES));
        }
    }
    return field;
}
Also used : LindenFieldSchema(com.xiaomi.linden.thrift.common.LindenFieldSchema)

Example 12 with LindenFieldSchema

use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.

the class LindenSchemaBuilder method build.

public static LindenSchema build(File schemaFile) throws Exception {
    Preconditions.checkNotNull(schemaFile);
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = dbf.newDocumentBuilder();
    Document dom = db.parse(schemaFile);
    dom.getDocumentElement().normalize();
    NodeList table = dom.getElementsByTagName(TABLE);
    Node colNode = table.item(0);
    LindenSchema lindenSchema = new LindenSchema();
    lindenSchema.setId(colNode.getAttributes().getNamedItem(TABLE_ID).getNodeValue());
    NodeList colList = dom.getElementsByTagName(TABLE_COLUMN);
    for (int i = 0; i < colList.getLength(); ++i) {
        Element element = (Element) colList.item(i);
        LindenFieldSchema fieldSchema = parseFiled(element);
        verifyFieldSchema(fieldSchema);
        lindenSchema.addToFields(fieldSchema);
    }
    return lindenSchema;
}
Also used : LindenFieldSchema(com.xiaomi.linden.thrift.common.LindenFieldSchema) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) LindenSchema(com.xiaomi.linden.thrift.common.LindenSchema) DocumentBuilder(javax.xml.parsers.DocumentBuilder) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Document(org.w3c.dom.Document)

Example 13 with LindenFieldSchema

use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.

the class LindenUtil method getSource.

/**
 * Get fields by doc id.
 *
 * @param indexSearcher The IndexSearcher
 * @param docId         Doc ID.
 * @param id            Id field value
 * @param sourceFields  Specify the fields, if null get all fields values.
 * @param config        the lindenConfig for search
 * @return JSON String which contains field values.
 * @throws IOException
 */
public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields, LindenConfig config) throws IOException {
    List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves();
    int idx = ReaderUtil.subIndex(docId, leaves);
    AtomicReaderContext atomicReaderContext = leaves.get(idx);
    AtomicReader reader = atomicReaderContext.reader();
    int locDocId = docId - atomicReaderContext.docBase;
    JSONObject src = new JSONObject();
    String idFieldName = config.getSchema().getId();
    if (id != null) {
        src.put(idFieldName, id);
    } else {
        src.put(idFieldName, FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString());
    }
    List<LindenFieldSchema> fields = new ArrayList<>();
    if (sourceFields != null && !sourceFields.isEmpty()) {
        for (String sourceField : sourceFields) {
            if (sourceField.equals(idFieldName)) {
                continue;
            }
            LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField);
            fields.add(fieldSchema);
        }
    } else {
        fields.addAll(config.getSchema().getFields());
    }
    Map<String, LindenFieldSchema> storedFields = new HashMap<>();
    for (LindenFieldSchema fieldSchema : fields) {
        String name = fieldSchema.getName();
        boolean fieldCache = false;
        if (fieldSchema.isMulti()) {
            /**
             * multi-field has multiple values, each value is indexed to the document according to field type
             * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]"
             * multi-field source value is stored in BinaryDocValues
             */
            String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
            if (StringUtils.isNotEmpty(blob)) {
                src.put(name, JSON.parseArray(blob));
            }
        } else if (fieldSchema.isDocValues()) {
            fieldCache = true;
        } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) {
            // field cache doesn't support tokenized string field
            if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            } else {
                storedFields.put(name, fieldSchema);
            }
        } else if (fieldSchema.isIndexed()) {
            if (!possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            }
        } else if (fieldSchema.isStored()) {
            storedFields.put(name, fieldSchema);
        }
        if (fieldCache) {
            Object val;
            switch(fieldSchema.getType()) {
                case STRING:
                case FACET:
                    val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
                    String v = (String) val;
                    fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId);
                    break;
                case INTEGER:
                    val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId);
                    fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId);
                    break;
                case LONG:
                    val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId);
                    fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId);
                    break;
                case FLOAT:
                    val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId);
                    fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId);
                    break;
                case DOUBLE:
                    val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId);
                    fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId);
                    break;
                default:
                    throw new IllegalStateException("Unsupported linden type");
            }
            if (fieldCache) {
                src.put(name, val);
            }
        }
    }
    if (!storedFields.isEmpty()) {
        Document doc = indexSearcher.doc(docId, storedFields.keySet());
        for (IndexableField field : doc.getFields()) {
            String name = field.name();
            LindenFieldSchema schema = storedFields.get(name);
            Object obj = src.get(name);
            Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType());
            if (obj == null) {
                if (schema.isMulti()) {
                    JSONArray array = new JSONArray();
                    array.add(val);
                    src.put(name, array);
                } else {
                    src.put(name, val);
                }
            } else if (obj instanceof JSONArray) {
                ((JSONArray) obj).add(val);
            } else {
                JSONArray array = new JSONArray();
                array.add(obj);
                array.add(val);
                src.put(name, array);
            }
        }
    }
    return src.toJSONString();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JSONArray(com.alibaba.fastjson.JSONArray) AtomicReader(org.apache.lucene.index.AtomicReader) Document(org.apache.lucene.document.Document) LindenFieldSchema(com.xiaomi.linden.thrift.common.LindenFieldSchema) IndexableField(org.apache.lucene.index.IndexableField) JSONObject(com.alibaba.fastjson.JSONObject) JSONObject(com.alibaba.fastjson.JSONObject) AtomicReaderContext(org.apache.lucene.index.AtomicReaderContext)

Example 14 with LindenFieldSchema

use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.

the class TestLindenWordDelimiterAnalyzer method init.

@Override
public void init() {
    lindenConfig.setIndexType(LindenConfig.IndexType.RAM);
    lindenConfig.setClusterUrl("127.0.0.1:2181/mock");
    LindenSchema schema = new LindenSchema().setId("id");
    schema.addToFields(new LindenFieldSchema().setName("title").setIndexed(true).setTokenized(true).setSnippet(true).setMulti(true));
    lindenConfig.setSchema(schema);
    lindenConfig.putToProperties("search.analyzer.class", "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
    lindenConfig.putToProperties("index.analyzer.class", "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
    lindenConfig.putToProperties("index.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
    lindenConfig.putToProperties("search.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
}
Also used : LindenFieldSchema(com.xiaomi.linden.thrift.common.LindenFieldSchema) LindenSchema(com.xiaomi.linden.thrift.common.LindenSchema)

Example 15 with LindenFieldSchema

use of com.xiaomi.linden.thrift.common.LindenFieldSchema in project linden by XiaoMi.

the class TestFlexibleQuery method init.

@Override
public void init() throws Exception {
    LindenSchema schema = new LindenSchema().setId("id");
    schema.addToFields(new LindenFieldSchema().setName("text").setIndexed(true).setStored(true).setTokenized(true));
    schema.addToFields(new LindenFieldSchema().setName("title").setIndexed(true).setStored(true).setTokenized(true));
    lindenConfig.setSchema(schema);
}
Also used : LindenFieldSchema(com.xiaomi.linden.thrift.common.LindenFieldSchema) LindenSchema(com.xiaomi.linden.thrift.common.LindenSchema)

Aggregations

LindenFieldSchema (com.xiaomi.linden.thrift.common.LindenFieldSchema)31 LindenSchema (com.xiaomi.linden.thrift.common.LindenSchema)17 LindenField (com.xiaomi.linden.thrift.common.LindenField)4 JSONObject (com.alibaba.fastjson.JSONObject)3 LindenDocument (com.xiaomi.linden.thrift.common.LindenDocument)3 IOException (java.io.IOException)3 JSONArray (com.alibaba.fastjson.JSONArray)2 Coordinate (com.xiaomi.linden.thrift.common.Coordinate)2 LindenType (com.xiaomi.linden.thrift.common.LindenType)2 Document (org.apache.lucene.document.Document)2 IndexableField (org.apache.lucene.index.IndexableField)2 Shape (com.spatial4j.core.shape.Shape)1 LindenValue (com.xiaomi.linden.thrift.common.LindenValue)1 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1 StringWriter (java.io.StringWriter)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1