Search in sources :

Example 6 with IndexException

use of gate.creole.annic.IndexException in project gate-core by GateNLP.

the class LuceneDataStoreImpl method setIndexer.

/**
 * Sets the Indexer to be used for indexing Datastore
 */
@Override
public void setIndexer(Indexer indexer, Map<String, Object> indexParameters) throws IndexException {
    this.indexer = indexer;
    this.indexParameters = indexParameters;
    this.indexURL = (URL) this.indexParameters.get(Constants.INDEX_LOCATION_URL);
    this.indexer.createIndex(this.indexParameters);
    // dump the version file
    try {
        File versionFile = getVersionFile();
        OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(versionFile));
        osw.write(versionNumber + Strings.getNl());
        String indexDirRelativePath = PersistenceManager.getRelativePath(storageDir.toURI().toURL(), indexURL);
        osw.write(indexDirRelativePath);
        osw.close();
    } catch (IOException e) {
        throw new IndexException("couldn't write version file: " + e);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) File(java.io.File)

Example 7 with IndexException

use of gate.creole.annic.IndexException in project gate-core by GateNLP.

the class LuceneIndexer method createIndex.

/**
 * Creates index directory and indexing all documents in the corpus.
 *
 * @param indexParameters This is a map containing various values
 *          required to create an index In case of LuceneIndexManager
 *          following are the values required
 *          <P>
 *          INDEX_LOCATION_URL - this is a URL where the Index be
 *          created
 *          <P>
 *          BASE_TOKEN_ANNOTATION_TYPE
 *          <P>
 *          INDEX_UNIT_ANNOTATION_TYPE
 *          <P>
 *          FEATURES_TO_EXCLUDE
 *          <P>
 *          FEATURES_TO_INCLUDE
 *          <P>
 */
@Override
public void createIndex(Map<String, Object> indexParameters) throws IndexException {
    checkIndexParameters(indexParameters);
    URL indexLocation = (URL) parameters.get(Constants.INDEX_LOCATION_URL);
    try {
        File file = null;
        try {
            file = new File(indexLocation.toURI());
        } catch (URISyntaxException use) {
            file = Files.fileFromURL(indexLocation);
        }
        // create an instance of Index Writer
        IndexWriter writer = new IndexWriter(file.getAbsolutePath(), new LuceneAnalyzer(), true);
        try {
            if (corpus != null) {
                // load documents and add them one by one
                for (int i = 0; i < corpus.size(); i++) {
                    gate.Document gateDoc = corpus.get(i);
                    String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() : gateDoc.getLRPersistenceId().toString();
                    System.out.print("Indexing : " + idToUse + " ...");
                    String corpusName = corpus.getLRPersistenceId() == null ? corpus.getName() : corpus.getLRPersistenceId().toString();
                    List<gate.creole.annic.apache.lucene.document.Document> luceneDocs = getLuceneDocuments(corpusName, gateDoc, indexLocation.toString());
                    if (luceneDocs != null) {
                        for (int j = 0; j < luceneDocs.size(); j++) {
                            if (luceneDocs.get(j) != null) {
                                writer.addDocument(luceneDocs.get(j));
                            }
                        }
                    }
                    if (gateDoc.getLRPersistenceId() != null) {
                        gate.Factory.deleteResource(gateDoc);
                    }
                    System.out.println("Done");
                }
            }
        // for (all documents)
        } finally {
            writer.close();
        }
        writeParametersToDisk();
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Document(gate.creole.annic.apache.lucene.document.Document) URL(java.net.URL) IndexWriter(gate.creole.annic.apache.lucene.index.IndexWriter) File(java.io.File)

Example 8 with IndexException

use of gate.creole.annic.IndexException in project gate-core by GateNLP.

the class LuceneIndexer method optimizeIndex.

/**
 * Optimize existing index.
 */
@Override
public void optimizeIndex() throws IndexException {
    try {
        String location = ((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toString();
        IndexWriter writer = new IndexWriter(location, new gate.creole.annic.lucene.LuceneAnalyzer(), false);
        try {
            writer.optimize();
        } finally {
            writer.close();
        }
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe);
    }
}
Also used : IndexException(gate.creole.annic.IndexException) IndexWriter(gate.creole.annic.apache.lucene.index.IndexWriter) IOException(java.io.IOException) URL(java.net.URL)

Aggregations

IndexException (gate.creole.annic.IndexException)8 File (java.io.File)6 IOException (java.io.IOException)6 URL (java.net.URL)6 URISyntaxException (java.net.URISyntaxException)5 Document (gate.creole.annic.apache.lucene.document.Document)3 IndexWriter (gate.creole.annic.apache.lucene.index.IndexWriter)3 Term (gate.creole.annic.apache.lucene.index.Term)2 IndexReader (gate.creole.annic.apache.lucene.index.IndexReader)1 Hits (gate.creole.annic.apache.lucene.search.Hits)1 IndexSearcher (gate.creole.annic.apache.lucene.search.IndexSearcher)1 TermQuery (gate.creole.annic.apache.lucene.search.TermQuery)1 GateRuntimeException (gate.util.GateRuntimeException)1 FileOutputStream (java.io.FileOutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1