Search in sources :

Example 1 with IndexWriter

use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.

the class LuceneIndexer method add.

/**
 * Add new documents to Index
 * @throws IndexException
 */
@Override
public void add(String corpusPersistenceID, List<gate.Document> added) throws IndexException {
    String location = null;
    // TODO should we use the gate util Files mehotd for this
    try {
        location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
    } catch (URISyntaxException use) {
        location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
    }
    try {
        IndexWriter writer = new IndexWriter(location, new LuceneAnalyzer(), false);
        try {
            if (added != null) {
                for (int i = 0; i < added.size(); i++) {
                    gate.Document gateDoc = added.get(i);
                    String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
                    System.out.print("Indexing : " + idToUse + " ...");
                    List<gate.creole.annic.apache.lucene.document.Document> docs = getLuceneDocuments(corpusPersistenceID, gateDoc, location);
                    if (docs == null) {
                        System.out.println("Done");
                        continue;
                    }
                    for (int j = 0; j < docs.size(); j++) {
                        writer.addDocument(docs.get(j));
                    }
                    System.out.println("Done");
                }
            // for (add all added documents)
            }
        } finally {
            // make sure we close the writer, whatever happens
            writer.close();
        }
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Document(gate.creole.annic.apache.lucene.document.Document) URL(java.net.URL) IndexWriter(gate.creole.annic.apache.lucene.index.IndexWriter) File(java.io.File)

Example 2 with IndexWriter

use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.

the class LuceneIndexer method createIndex.

/**
 * Creates index directory and indexing all documents in the corpus.
 *
 * @param indexParameters This is a map containing various values
 *          required to create an index In case of LuceneIndexManager
 *          following are the values required
 *          <P>
 *          INDEX_LOCATION_URL - this is a URL where the Index be
 *          created
 *          <P>
 *          BASE_TOKEN_ANNOTATION_TYPE
 *          <P>
 *          INDEX_UNIT_ANNOTATION_TYPE
 *          <P>
 *          FEATURES_TO_EXCLUDE
 *          <P>
 *          FEATURES_TO_INCLUDE
 *          <P>
 */
@Override
public void createIndex(Map<String, Object> indexParameters) throws IndexException {
    checkIndexParameters(indexParameters);
    URL indexLocation = (URL) parameters.get(Constants.INDEX_LOCATION_URL);
    try {
        File file = null;
        try {
            file = new File(indexLocation.toURI());
        } catch (URISyntaxException use) {
            file = Files.fileFromURL(indexLocation);
        }
        // create an instance of Index Writer
        IndexWriter writer = new IndexWriter(file.getAbsolutePath(), new LuceneAnalyzer(), true);
        try {
            if (corpus != null) {
                // load documents and add them one by one
                for (int i = 0; i < corpus.size(); i++) {
                    gate.Document gateDoc = corpus.get(i);
                    String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
                    System.out.print("Indexing : " + idToUse + " ...");
                    String corpusName = corpus.getLRPersistenceId() == null ? corpus.getName() : corpus.getLRPersistenceId().toString();
                    List<gate.creole.annic.apache.lucene.document.Document> luceneDocs = getLuceneDocuments(corpusName, gateDoc, indexLocation.toString());
                    if (luceneDocs != null) {
                        for (int j = 0; j < luceneDocs.size(); j++) {
                            if (luceneDocs.get(j) != null) {
                                writer.addDocument(luceneDocs.get(j));
                            }
                        }
                    }
                    if (gateDoc.getLRPersistenceId() != null) {
                        gate.Factory.deleteResource(gateDoc);
                    }
                    System.out.println("Done");
                }
            }
        // for (all documents)
        } finally {
            writer.close();
        }
        writeParametersToDisk();
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Document(gate.creole.annic.apache.lucene.document.Document) URL(java.net.URL) IndexWriter(gate.creole.annic.apache.lucene.index.IndexWriter) File(java.io.File)

Example 3 with IndexWriter

use of gate.creole.annic.apache.lucene.index.IndexWriter in project gate-core by GateNLP.

the class LuceneIndexer method optimizeIndex.

/**
 * Optimize existing index.
 */
@Override
public void optimizeIndex() throws IndexException {
    try {
        String location = ((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toString();
        IndexWriter writer = new IndexWriter(location, new gate.creole.annic.lucene.LuceneAnalyzer(), false);
        try {
            writer.optimize();
        } finally {
            writer.close();
        }
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) IndexWriter(gate.creole.annic.apache.lucene.index.IndexWriter) IOException(java.io.IOException) URL(java.net.URL)

Aggregations

IndexException (gate.creole.annic.IndexException)3 IndexWriter (gate.creole.annic.apache.lucene.index.IndexWriter)3 IOException (java.io.IOException)3 URL (java.net.URL)3 Document (gate.creole.annic.apache.lucene.document.Document)2 File (java.io.File)2 URISyntaxException (java.net.URISyntaxException)2