Examples with IndexSchema - org.apache.solr.schema.IndexSchema

Example 51 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project SearchServices by Alfresco.

the class AlfrescoLukeRequestHandler method handleRequestBody.

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    IndexSchema schema = req.getSchema();
    SolrIndexSearcher searcher = req.getSearcher();
    DirectoryReader reader = searcher.getIndexReader();
    SolrParams params = req.getParams();
    ShowStyle style = ShowStyle.get(params.get("show"));
    // If no doc is given, show all fields and top terms
    rsp.add("index", getIndexInfo(reader));
    if (ShowStyle.INDEX == style) {
        // that's all we need
        return;
    }
    Integer docId = params.getInt(DOC_ID);
    if (docId == null && params.get(ID) != null) {
        // Look for something with a given solr ID
        SchemaField uniqueKey = schema.getUniqueKeyField();
        String v = uniqueKey.getType().toInternal(params.get(ID));
        Term t = new Term(uniqueKey.getName(), v);
        docId = searcher.getFirstMatch(t);
        if (docId < 0) {
            throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID));
        }
    }
    // Read the document from the index
    if (docId != null) {
        if (style != null && style != ShowStyle.DOC) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style");
        }
        Document doc = null;
        try {
            doc = reader.document(docId);
        } catch (Exception ex) {
        }
        if (doc == null) {
            throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId);
        }
        SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc, docId, reader, schema);
        SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<>();
        docinfo.add("docId", docId);
        docinfo.add("lucene", info);
        docinfo.add("solr", doc);
        rsp.add("doc", docinfo);
    } else if (ShowStyle.SCHEMA == style) {
        rsp.add("schema", getSchemaInfo(req.getSchema()));
    } else {
        rsp.add("fields", getIndexedFieldsInfo(req));
    }
    // Add some generally helpful information
    NamedList<Object> info = new SimpleOrderedMap<>();
    info.add("key", getFieldFlagsKey());
    info.add("NOTE", "Document Frequency (df) is not updated when a document is marked for deletion.  df values include deleted documents.");
    rsp.add("info", info);
    rsp.setHttpCaching(false);
}

Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) SolrParams(org.apache.solr.common.params.SolrParams) IndexSchema(org.apache.solr.schema.IndexSchema) SolrException(org.apache.solr.common.SolrException)

Example 52 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project SearchServices by Alfresco.

the class AlfrescoLukeRequestHandler method getIndexedFieldsInfo.

private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req) throws Exception {
    SolrIndexSearcher searcher = req.getSearcher();
    SolrParams params = req.getParams();
    Set<String> fields = null;
    String fl = params.get(CommonParams.FL);
    if (fl != null) {
        fields = new TreeSet<>(Arrays.asList(fl.split("[,\\s]+")));
    }
    LeafReader reader = searcher.getSlowAtomicReader();
    IndexSchema schema = searcher.getSchema();
    // Don't be tempted to put this in the loop below, the whole point here
    // is to alphabetize the fields!
    Set<String> fieldNames = new TreeSet<>();
    for (FieldInfo fieldInfo : reader.getFieldInfos()) {
        fieldNames.add(fieldInfo.name);
    }
    // Walk the term enum and keep a priority queue for each map in our set
    SimpleOrderedMap<Object> vInfo = new SimpleOrderedMap<>();
    SimpleOrderedMap<Object> aInfo = new SimpleOrderedMap<>();
    for (String fieldName : fieldNames) {
        if (fields != null && !fields.contains(fieldName) && !fields.contains("*")) {
            // we're not interested in this field Still an issue
            continue;
        // here
        }
        SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();
        SchemaField sfield = schema.getFieldOrNull(fieldName);
        FieldType ftype = (sfield == null) ? null : sfield.getType();
        fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName());
        fieldMap.add("schema", getFieldFlags(sfield));
        if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
            fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
        }
        Terms terms = reader.fields().terms(fieldName);
        if (terms == null) {
            // Not indexed, so we need to report what we
            // can (it made it through the fl param if
            // specified)
            vInfo.add(AlfrescoSolrDataModel.getInstance().getAlfrescoPropertyFromSchemaField(fieldName), fieldMap);
            aInfo.add(fieldName, fieldMap);
            continue;
        }
        if (sfield != null && sfield.indexed()) {
            if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS, true)) {
                Document doc = getFirstLiveDoc(terms, reader);
                if (doc != null) {
                    // Found a document with this field
                    try {
                        IndexableField fld = doc.getField(fieldName);
                        if (fld != null) {
                            fieldMap.add("index", getFieldFlags(fld));
                        } else {
                            // it is a non-stored field...
                            fieldMap.add("index", "(unstored field)");
                        }
                    } catch (Exception ex) {
                        log.warn("error reading field: " + fieldName);
                    }
                }
            }
            fieldMap.add("docs", terms.getDocCount());
        }
        if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
            getDetailedFieldInfo(req, fieldName, fieldMap);
        }
        // Add the field
        vInfo.add(fieldName, fieldMap);
        aInfo.add(AlfrescoSolrDataModel.getInstance().getAlfrescoPropertyFromSchemaField(fieldName), fieldMap);
    }
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
    finfo.addAll(vInfo);
    // finfo.add("mimetype()", finfo.get("cm:content.mimetype"));
    // finfo.add("contentSize()", finfo.get("cm:content.size"));
    finfo.addAll(aInfo);
    return finfo;
}

Also used : LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Document(org.apache.lucene.document.Document) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) TreeSet(java.util.TreeSet) SolrParams(org.apache.solr.common.params.SolrParams) IndexSchema(org.apache.solr.schema.IndexSchema) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 53 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project Solbase by Photobucket.

the class SolbaseCoreContainer method readSchema.

public synchronized SolrCore readSchema(String indexName, String schemaName) throws IOException, ParserConfigurationException, SAXException {
    SolrCore core = cache.get(indexName);
    if (core == null) {
        logger.debug("loading indexInfo for: " + indexName);
        byte[] schemaBytes = readSchemaXMLBytes(indexName);
        if (schemaBytes == null) {
            throw new IOException("schema doesn't exist for indexName: " + indexName);
        }
        ByteBuffer buf = ByteBuffer.wrap(schemaBytes);
        InputStream stream = new ByteArrayInputStream(buf.array(), buf.position(), buf.remaining());
        SolrConfig solrConfig = new SolrConfig(solrConfigFile);
        IndexSchema schema = new IndexSchema(solrConfig, schemaName, stream);
        core = new SolrCore(indexName, "/tmp/search/solr-hbase/data", solrConfig, schema, null);
        logger.debug("Loaded core from hbase: " + indexName);
        cache.put(indexName, core);
    }
    return core;
}

Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) IndexSchema(org.apache.solr.schema.IndexSchema) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer)

Example 54 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project stanbol by apache.

the class IndexConfiguration method processFstConfig.

/**
 * This method combines the {@link #fstConfig} with the data present in the
 * {@link SolrCore}.
 * <p>
 * As information for fields are only available when a
 * field was actually used by a document stored in the index one needs to
 * inspect the index after every change.
 * <p>
 * An empty Solr index will result in
 * an empty {@link #corpusInfos} map. The first document with an value
 * for the English field will cause an {@link CorpusInfo} for the English
 * language to be created. As soon as the last document with an label for
 * a given language will be deleted the {@link CorpusInfo} for that language
 * will also disappear.
 * @param indexVersion the current version of the {@link #index} to process
 * the FST config for.
 * <p>
 * This method acquires a write lock on {@link #corpusInfoLock} while it
 * inspects the Solr index
 * @param indexReader The {@link AtomicReader} has access to the actual
 * fields present in the {@link SolrCore}. It is used to compare field
 * configurations in the {@link #fstConfig} with fields present in the Solr
 * {@link #index}.
 * @return If any {@link CorpusInfo FST configuration} where found during
 * inspecting the Solr {@link #index}
 */
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
    // first check if the Solr index was updated
    corpusInfoLock.readLock().lock();
    try {
        if (indexVersion == this.indexVersion) {
            // nothing to do
            return !corpusInfos.isEmpty();
        }
    } finally {
        corpusInfoLock.readLock().unlock();
    }
    log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
    boolean foundCorpus = false;
    corpusInfoLock.writeLock().lock();
    try {
        this.indexVersion = indexVersion;
        IndexSchema schema = index.getLatestSchema();
        Map<String, CorpusInfo> corpusInfosCopy;
        if (corpusInfos == null) {
            // first call
            // init the field
            corpusInfos = new HashMap<String, CorpusInfo>();
            corpusInfosCopy = new HashMap<String, CorpusInfo>();
        } else {
            corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
            // clear the old data
            corpusInfos.clear();
        }
        // (0) get basic parameters of the default configuration
        log.debug(" - default config");
        Map<String, String> defaultParams = fstConfig.getDefaultParameters();
        String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
        String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
        String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
        if (storeField == null) {
            // apply indexField as default if indexField is NOT NULL
            storeField = indexField;
        }
        if (indexField == null) {
            // apply the defaults if null
            indexField = IndexConfiguration.DEFAULT_FIELD;
        }
        if (fstName == null) {
            // use default
            fstName = getDefaultFstFileName(indexField);
        }
        // This are all fields actually present in the index (distinguished with
        // those defined in the schema). This also includes actual instances of
        // dynamic field definition in the schema.
        // we need this twice
        FieldInfos fieldInfos = indexReader.getFieldInfos();
        // NOTE: this needs only do be done if wildcards are enabled in the fstConfig
        if (fstConfig.useWildcard()) {
            // (1.a) search for present FST files in the FST directory
            Map<String, File> presentFstFiles = new HashMap<String, File>();
            WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
            Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
            while (fstFiles.hasNext()) {
                File fstFile = fstFiles.next();
                String fstFileName = fstFile.getName();
                // files are named such as "{name}.{lang}.fst"
                String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
                presentFstFiles.put(language, fstFile);
            }
            // (1.b) iterate over the fields in the Solr index and search for
            // matches against the configured indexField name
            String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
            for (FieldInfo fieldInfo : fieldInfos) {
                // try to match the field names against the wildcard
                if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
                    // for matches parse the language from the field name
                    String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
                    if (// successfully parsed language
                    language != null && // is current language is enabled?
                    fstConfig.isLanguage(language) && // is there no explicit configuration for this language?
                    !fstConfig.getExplicitlyIncluded().contains(language)) {
                        // generate the FST file name
                        StringBuilder fstFileName = new StringBuilder(fstName);
                        if (!language.isEmpty()) {
                            fstFileName.append('.').append(language);
                        }
                        fstFileName.append(".fst");
                        File fstFile = new File(fstDirectory, fstFileName.toString());
                        // get the FieldType of the field from the Solr schema
                        FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                        if (fieldType != null) {
                            // if the fieldType is present
                            if (runtimeGeneration || fstFile.isFile()) {
                                // and FST is present or can be created
                                // we need also to check if the stored field with
                                // the labels is present
                                // get the stored Field and check if it is present!
                                String storeFieldName;
                                if (storeField == null) {
                                    // storeField == indexField
                                    storeFieldName = fieldInfo.name;
                                } else {
                                    // check that the storeField is present in the index
                                    storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
                                    FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                                    if (storedFieldInfos == null) {
                                        log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
                                        storeFieldName = null;
                                    }
                                }
                                if (storeFieldName != null) {
                                    // == valid configuration
                                    CorpusInfo fstInfo = corpusInfosCopy.get(language);
                                    if (// new one
                                    fstInfo == null || // index field compatible
                                    !fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
                                        // store field compatible
                                        CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
                                        log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
                                        addCorpusInfo(newFstInfo);
                                        corpusInfosCopy.put(language, newFstInfo);
                                    } else {
                                        // no change in the SolrIndex ... use the exsisting CorpusInfo
                                        addCorpusInfo(fstInfo);
                                    }
                                    foundCorpus = true;
                                }
                            } else {
                                log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
                            }
                        } else {
                            log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
                        }
                    }
                // else the field matched the wildcard, but has not passed the
                // encoding test.
                }
            // Solr field does not match the field definition in the config
            }
        // end iterate over all fields in the SolrIndex
        }
        // (2) process explicit configuration for configured languages
        for (String language : fstConfig.getExplicitlyIncluded()) {
            // (2.a) get the language specific config (with fallback to default)
            Map<String, String> config = fstConfig.getParameters(language);
            String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
            String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
            String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
            final boolean langAllowCreation;
            final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
            if (langIndexField != null) {
                // also consider explicit field names as default for the fst name
                if (langFstFileName == null) {
                    StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
                    if (!language.isEmpty()) {
                        fileName.append('.').append(language);
                    }
                    fileName.append(".fst");
                    langFstFileName = fileName.toString();
                }
            } else {
                langIndexField = indexField;
            }
            if (langStoreField == null) {
                // fallbacks
                if (storeField != null) {
                    // first to default store field
                    langStoreField = storeField;
                } else {
                    // else to the lang index field
                    langStoreField = langIndexField;
                }
            }
            if (langFstFileName == null) {
                // no fstFileName config
                // ... use the default
                langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
            }
            if (langAllowCreationString != null) {
                langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
            } else {
                langAllowCreation = runtimeGeneration;
            }
            // (2.b) check if the Solr field is present
            String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
            String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
            FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
            if (langIndexFieldInfo != null) {
                FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
                if (langStoreFieldInfo != null) {
                    FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
                    if (fieldType != null) {
                        // (2.c) check the FST file
                        File langFstFile = new File(fstDirectory, langFstFileName);
                        if (langFstFile.isFile() || langAllowCreation) {
                            CorpusInfo langFstInfo = corpusInfosCopy.get(language);
                            if (// new one
                            langFstInfo == null || // index field compatible
                            !langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
                                // store field compatible
                                CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
                                log.debug("   ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
                                addCorpusInfo(newLangFstInfo);
                            } else {
                                // we can use the existing instance
                                addCorpusInfo(langFstInfo);
                            }
                            foundCorpus = true;
                        } else {
                            log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
                        }
                    } else {
                        log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
                    }
                } else {
                    log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
                }
            } else {
                log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
            }
        }
    } finally {
        corpusInfoLock.writeLock().unlock();
    }
    return foundCorpus;
}

Also used : HashMap(java.util.HashMap) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) FieldType(org.apache.solr.schema.FieldType) FieldInfos(org.apache.lucene.index.FieldInfos) IndexSchema(org.apache.solr.schema.IndexSchema) File(java.io.File) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 55 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class TestCodecSupport method testCompressionModeDefault.

public void testCompressionModeDefault() throws IOException {
    assertEquals("Default Solr compression mode changed. Is this expected?", SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE, Mode.valueOf("BEST_SPEED"));
    String previousCoreName = h.coreName;
    String newCoreName = "core_with_default_compression";
    SolrCore c = null;
    SolrConfig config = TestHarness.createConfig(testSolrHome, previousCoreName, "solrconfig_codec2.xml");
    assertEquals("Unexpected codec factory for this test.", "solr.SchemaCodecFactory", config.get("codecFactory/@class"));
    assertNull("Unexpected configuration of codec factory for this test. Expecting empty element", config.getNode("codecFactory", false).getFirstChild());
    IndexSchema schema = IndexSchemaFactory.buildIndexSchema("schema_codec.xml", config);
    CoreContainer coreContainer = h.getCoreContainer();
    try {
        CoreDescriptor cd = new CoreDescriptor(newCoreName, testSolrHome.resolve(newCoreName), coreContainer.getContainerProperties(), coreContainer.isZooKeeperAware());
        c = new SolrCore(coreContainer, cd, new ConfigSet("fakeConfigset", config, schema, null, true));
        assertNull(coreContainer.registerCore(cd, c, false, false));
        h.coreName = newCoreName;
        assertEquals("We are not using the correct core", "solrconfig_codec2.xml", h.getCore().getConfigResource());
        assertU(add(doc("string_f", "foo")));
        assertU(commit());
        assertCompressionMode(SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE.name(), h.getCore());
    } finally {
        h.coreName = previousCoreName;
        coreContainer.unload(newCoreName);
    }
}

Also used : IndexSchema(org.apache.solr.schema.IndexSchema)

Aggregations

IndexSchema (org.apache.solr.schema.IndexSchema)116 SolrInputDocument (org.apache.solr.common.SolrInputDocument)42 SchemaField (org.apache.solr.schema.SchemaField)34 HashMap (java.util.HashMap)16 SolrException (org.apache.solr.common.SolrException)15 IOException (java.io.IOException)14 FieldType (org.apache.solr.schema.FieldType)14 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)13 Date (java.util.Date)12 LinkedHashMap (java.util.LinkedHashMap)12 NamedList (org.apache.solr.common.util.NamedList)12 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)12 ArrayList (java.util.ArrayList)11 Document (org.apache.lucene.document.Document)11 SolrParams (org.apache.solr.common.params.SolrParams)11 DateTime (org.joda.time.DateTime)10 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)9 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)9 SolrConfig (org.apache.solr.core.SolrConfig)8 Test (org.junit.Test)7