Examples with Analyzer - org.apache.lucene.analysis.Analyzer

Example 31 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project ansj_seg by NLPchina.

the class IndexTest method indexTest.

@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
    MyStaticValue.DIC.put(MyStaticValue.DIC_DEFAULT, "../../library/default.dic");
    HashSet<String> hs = new HashSet<String>();
    hs.add("的");
    Analyzer analyzer = new AnsjIndexAnalysis(hs, false);
    Directory directory = null;
    IndexWriter iwriter = null;
    String text = "季德胜蛇药片 10片*6板 ";
    UserDefineLibrary.insertWord("蛇药片", "n", 1000);
    IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalysis(hs, false);
    ;
    System.out.println("index ok to search!");
    search(queryAnalyzer, directory, "\"季德胜蛇药片\"");
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) Analyzer(org.apache.lucene.analysis.Analyzer) AnsjIndexAnalysis(org.ansj.lucene4.AnsjIndexAnalysis) RAMDirectory(org.apache.lucene.store.RAMDirectory) AnsjAnalysis(org.ansj.lucene4.AnsjAnalysis) HashSet(java.util.HashSet) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 32 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project OpenGrok by OpenGrok.

the class IndexDatabase method optimize.

/**
     * Optimize the index database
     */
public void optimize() {
    synchronized (lock) {
        if (running) {
            LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
            return;
        }
        running = true;
    }
    IndexWriter wrt = null;
    try {
        LOGGER.info("Optimizing the index ... ");
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig conf = new IndexWriterConfig(analyzer);
        conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
        wrt = new IndexWriter(indexDirectory, conf);
        // this is deprecated and not needed anymore            
        wrt.forceMerge(1);
        LOGGER.info("done");
        synchronized (lock) {
            if (dirtyFile.exists() && !dirtyFile.delete()) {
                LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
            }
            dirty = false;
        }
    } catch (IOException e) {
        LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
    } finally {
        if (wrt != null) {
            try {
                wrt.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }
        synchronized (lock) {
            running = false;
        }
    }
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IOException(java.io.IOException) FileAnalyzer(org.opensolaris.opengrok.analysis.FileAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 33 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project OpenGrok by OpenGrok.

the class IndexDatabase method update.

/**
     * Update the content of this index database
     *
     * @throws IOException if an error occurs
     * @throws HistoryException if an error occurs when accessing the history
     */
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }
    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        LOGGER.severe("Unable to run ctags! searching definitions will not work!");
    }
    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        // to make sure index exists on the disk            
        writer.commit();
        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }
        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }
            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
            String startuid = Util.path2uid(dir, "");
            // open existing index
            IndexReader reader = DirectoryReader.open(indexDirectory);
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                //reader.getTermVectors(0);
                Fields uFields = MultiFields.getFields(reader);
                terms = uFields.terms(QueryBuilder.U);
            }
            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator();
                    //init uid                        
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    LOGGER.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    LOGGER.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir });
                }
                indexDown(sourceRoot, dir, false, 0, file_cnt);
                while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }
        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }
        synchronized (lock) {
            running = false;
        }
    }
    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                LOGGER.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                LOGGER.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

Also used : RuntimeEnvironment(org.opensolaris.opengrok.configuration.RuntimeEnvironment) Terms(org.apache.lucene.index.Terms) IOException(java.io.IOException) FileAnalyzer(org.opensolaris.opengrok.analysis.FileAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) TermsEnum(org.apache.lucene.index.TermsEnum) Fields(org.apache.lucene.index.Fields) MultiFields(org.apache.lucene.index.MultiFields) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Ctags(org.opensolaris.opengrok.analysis.Ctags) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 34 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project Openfire by igniterealtime.

the class ChatSearchManager method loadAnalyzer.

/**
     * Load the search analyzer. A custom analyzer class will be used if it is defined.
     */
private void loadAnalyzer() {
    Analyzer analyzer = null;
    String analyzerClass = null;
    String words = null;
    // First check if the workgroup should use a special Analyzer
    analyzerClass = workgroup.getProperties().getProperty("search.analyzer.className");
    if (analyzerClass != null) {
        words = workgroup.getProperties().getProperty("search.analyzer.stopWordList");
    } else {
        // Use the global analyzer
        analyzerClass = getAnalyzerClass();
        words = JiveGlobals.getProperty("workgroup.search.analyzer.stopWordList");
    }
    // get stop word list is there was one
    List<String> stopWords = new ArrayList<String>();
    if (words != null) {
        StringTokenizer st = new StringTokenizer(words, ",");
        while (st.hasMoreTokens()) {
            stopWords.add(st.nextToken().trim());
        }
    }
    try {
        analyzer = getAnalyzerInstance(analyzerClass, stopWords);
    } catch (Exception e) {
        Log.error("Error loading custom " + "search analyzer: " + analyzerClass, e);
    }
    // If the analyzer is null, use the standard analyzer.
    if (analyzer == null && stopWords.size() > 0) {
        analyzer = new StandardAnalyzer(stopWords.toArray(new String[stopWords.size()]));
    } else if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    }
    indexerAnalyzer = analyzer;
}

Also used : StringTokenizer(java.util.StringTokenizer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) SQLException(java.sql.SQLException) DocumentException(org.dom4j.DocumentException) IOException(java.io.IOException)

Example 35 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project Openfire by igniterealtime.

the class ChatSearchManager method getAnalyzerInstance.

private Analyzer getAnalyzerInstance(String analyzerClass, List<String> stopWords) throws Exception {
    Analyzer analyzer = null;
    // Load the class.
    Class c = null;
    try {
        c = ClassUtils.forName(analyzerClass);
    } catch (ClassNotFoundException e) {
        c = getClass().getClassLoader().loadClass(analyzerClass);
    }
    // Create an instance of the custom analyzer.
    if (stopWords.size() > 0) {
        Class[] params = new Class[] { String[].class };
        try {
            Constructor constructor = c.getConstructor(params);
            Object[] initargs = { (String[]) stopWords.toArray(new String[stopWords.size()]) };
            analyzer = (Analyzer) constructor.newInstance(initargs);
        } catch (NoSuchMethodException e) {
            // no String[] parameter to the constructor
            analyzer = (Analyzer) c.newInstance();
        }
    } else {
        analyzer = (Analyzer) c.newInstance();
    }
    return analyzer;
}

Also used : Constructor(java.lang.reflect.Constructor) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1020 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)396 Tokenizer (org.apache.lucene.analysis.Tokenizer)265 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)228 Document (org.apache.lucene.document.Document)207 Directory (org.apache.lucene.store.Directory)192 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)176 BytesRef (org.apache.lucene.util.BytesRef)122 Test (org.junit.Test)119 TokenStream (org.apache.lucene.analysis.TokenStream)107 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)92 Term (org.apache.lucene.index.Term)92 IndexReader (org.apache.lucene.index.IndexReader)67 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)65 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)64 Input (org.apache.lucene.search.suggest.Input)63 CharArraySet (org.apache.lucene.analysis.CharArraySet)58 ArrayList (java.util.ArrayList)57 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)57 TextField (org.apache.lucene.document.TextField)55