Examples with IndexWriter - org.apache.lucene.index.IndexWriter

Example 56 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class IndexTest method indexTest.

@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
    MyStaticValue.DIC.put(MyStaticValue.DIC_DEFAULT, "../../library/default.dic");
    HashSet<String> hs = new HashSet<String>();
    hs.add("的");
    Analyzer analyzer = new AnsjIndexAnalysis(hs, false);
    Directory directory = null;
    IndexWriter iwriter = null;
    String text = "季德胜蛇药片 10片*6板 ";
    UserDefineLibrary.insertWord("蛇药片", "n", 1000);
    IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalysis(hs, false);
    ;
    System.out.println("index ok to search!");
    search(queryAnalyzer, directory, "\"季德胜蛇药片\"");
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) Analyzer(org.apache.lucene.analysis.Analyzer) AnsjIndexAnalysis(org.ansj.lucene4.AnsjIndexAnalysis) RAMDirectory(org.apache.lucene.store.RAMDirectory) AnsjAnalysis(org.ansj.lucene4.AnsjAnalysis) HashSet(java.util.HashSet) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 57 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class NearTest method createIndex.

public static void createIndex() throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, new AnsjAnalysis());
    Directory directory = FSDirectory.open(new File("c:/index"));
    IndexWriter writer = new IndexWriter(directory, conf);
    String str = "文化人;文化人谈文化";
    String[] values = str.split(";");
    for (String value : values) {
        Document doc = new Document();
        Field field = new Field("test", value, Store.YES, Index.ANALYZED_NO_NORMS, TermVector.WITH_POSITIONS_OFFSETS);
        //			field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        doc.add(field);
        writer.addDocument(doc);
        writer.commit();
    }
    writer.close();
}

Also used : Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) File(java.io.File) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 58 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project OpenGrok by OpenGrok.

the class IndexDatabase method optimize.

/**
     * Optimize the index database
     */
public void optimize() {
    synchronized (lock) {
        if (running) {
            LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
            return;
        }
        running = true;
    }
    IndexWriter wrt = null;
    try {
        LOGGER.info("Optimizing the index ... ");
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig conf = new IndexWriterConfig(analyzer);
        conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
        wrt = new IndexWriter(indexDirectory, conf);
        // this is deprecated and not needed anymore            
        wrt.forceMerge(1);
        LOGGER.info("done");
        synchronized (lock) {
            if (dirtyFile.exists() && !dirtyFile.delete()) {
                LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
            }
            dirty = false;
        }
    } catch (IOException e) {
        LOGGER.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
    } finally {
        if (wrt != null) {
            try {
                wrt.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }
        synchronized (lock) {
            running = false;
        }
    }
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IOException(java.io.IOException) FileAnalyzer(org.opensolaris.opengrok.analysis.FileAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 59 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project OpenGrok by OpenGrok.

the class IndexDatabase method update.

/**
     * Update the content of this index database
     *
     * @throws IOException if an error occurs
     * @throws HistoryException if an error occurs when accessing the history
     */
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }
    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        LOGGER.severe("Unable to run ctags! searching definitions will not work!");
    }
    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        // to make sure index exists on the disk            
        writer.commit();
        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }
        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }
            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
            String startuid = Util.path2uid(dir, "");
            // open existing index
            IndexReader reader = DirectoryReader.open(indexDirectory);
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                //reader.getTermVectors(0);
                Fields uFields = MultiFields.getFields(reader);
                terms = uFields.terms(QueryBuilder.U);
            }
            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator();
                    //init uid                        
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    LOGGER.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    LOGGER.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir });
                }
                indexDown(sourceRoot, dir, false, 0, file_cnt);
                while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }
        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }
        synchronized (lock) {
            running = false;
        }
    }
    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                LOGGER.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                LOGGER.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

Also used : RuntimeEnvironment(org.opensolaris.opengrok.configuration.RuntimeEnvironment) Terms(org.apache.lucene.index.Terms) IOException(java.io.IOException) FileAnalyzer(org.opensolaris.opengrok.analysis.FileAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) TermsEnum(org.apache.lucene.index.TermsEnum) Fields(org.apache.lucene.index.Fields) MultiFields(org.apache.lucene.index.MultiFields) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Ctags(org.opensolaris.opengrok.analysis.Ctags) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 60 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project Openfire by igniterealtime.

the class ChatSearchManager method updateIndex.

/**
     * Updates the index file with new chats that took place since the last added chat to the
     * index. If the index file is missing or a chat was never added to the index file then
     * {@link #rebuildIndex} will be used instead.
     *
     * @param forceUpdate true if the index should be updated despite of the execution frequency.
     * @throws IOException if the directory cannot be read/written to, or it does not exist, or
     *                     there is a problem adding a document to the index.
     */
public synchronized void updateIndex(boolean forceUpdate) throws IOException {
    // Check that the index files exist
    File dir = new File(searchDirectory);
    boolean create = !dir.exists() || !dir.isDirectory();
    if (lastUpdated == null || create) {
        // Recreate the index since it was never created or the index files disappeared
        rebuildIndex();
    } else {
        if (forceUpdate || (System.currentTimeMillis() - lastExecution.getTime()) / 60000 > getExecutionFrequency()) {
            List<ChatInformation> chatsInformation = getChatsInformation(lastUpdated);
            if (!chatsInformation.isEmpty()) {
                // Reset the number of transcripts pending to be added to the index
                pendingTranscripts.set(0);
                Date lastDate = null;
                IndexWriter writer = getWriter(false);
                for (ChatInformation chat : chatsInformation) {
                    addTranscriptToIndex(chat, writer);
                    lastDate = chat.getCreationDate();
                }
                // Check if we need to optimize the index. The index is optimized once a day
                if ((System.currentTimeMillis() - lastOptimization.getTime()) / ONE_HOUR > getOptimizationFrequency()) {
                    writer.optimize();
                    // Update the optimized date
                    lastOptimization = new Date();
                }
                writer.close();
                closeSearcherReader();
                // Reset the filters cache
                cachedFilters.clear();
                // Update the last updated date
                lastUpdated = lastDate;
                // Save the last updated and optimized dates to the database
                saveDates();
            }
            // Update the last time the update process was executed
            lastExecution = new Date();
        }
    }
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) File(java.io.File) Date(java.util.Date)

Aggregations

IndexWriter (org.apache.lucene.index.IndexWriter)529 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)306 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)293 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)136 DirectoryReader (org.apache.lucene.index.DirectoryReader)127 Term (org.apache.lucene.index.Term)125 IndexSearcher (org.apache.lucene.search.IndexSearcher)110 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)107 TextField (org.apache.lucene.document.TextField)104 RAMDirectory (org.apache.lucene.store.RAMDirectory)88 IOException (java.io.IOException)86 Field (org.apache.lucene.document.Field)86 TermQuery (org.apache.lucene.search.TermQuery)56 StringField (org.apache.lucene.document.StringField)52 BytesRef (org.apache.lucene.util.BytesRef)52 FieldType (org.apache.lucene.document.FieldType)50 Test (org.junit.Test)49 Query (org.apache.lucene.search.Query)45