use of gate.creole.annic.apache.lucene.search.IndexSearcher in project gate-core by GateNLP.
the class LuceneIndexer method getNamesOfSerializedFiles.
/**
* This method returns a set of annotation set names that are indexed.
*/
public Set<String> getNamesOfSerializedFiles(String documentID) throws IndexException {
String location = null;
try {
location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
location = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
Set<String> toReturn = new HashSet<String>();
try {
Term term = new Term(Constants.DOCUMENT_ID, documentID);
TermQuery tq = new TermQuery(term);
gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(location);
try {
// and now execute the query
// result of which will be stored in hits
Hits luceneHits = searcher.search(tq);
for (int i = 0; i < luceneHits.length(); i++) {
Document luceneDoc = luceneHits.doc(i);
String documentIdOfSerializedFile = luceneDoc.get(Constants.DOCUMENT_ID_FOR_SERIALIZED_FILE);
toReturn.add(documentIdOfSerializedFile);
}
return toReturn;
} finally {
searcher.close();
}
} catch (IOException ioe) {
throw new IndexException(ioe);
}
}
use of gate.creole.annic.apache.lucene.search.IndexSearcher in project gate-core by GateNLP.
the class LuceneSearcher method freq.
@Override
public int freq(String corpusToSearchIn, String annotationSetToSearchIn, String annotationType, String featureName, String value) throws SearchException {
String indexLocation;
try {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
IndexSearcher indexSearcher;
try {
// open the IndexSearcher
indexSearcher = new IndexSearcher(indexLocation);
} catch (IOException e) {
e.printStackTrace();
return -1;
}
int result = StatsCalculator.freq(indexSearcher, corpusToSearchIn, annotationSetToSearchIn, annotationType, featureName, value);
try {
// close the IndexSearcher
indexSearcher.close();
} catch (IOException ioe) {
ioe.printStackTrace();
return -1;
}
return result;
}
use of gate.creole.annic.apache.lucene.search.IndexSearcher in project gate-core by GateNLP.
the class LuceneSearcher method search.
/**
* Method retunrs true/false indicating whether results were found or not.
*/
@SuppressWarnings("unchecked")
@Override
public boolean search(String query, Map<String, Object> parameters) throws SearchException {
luceneHits = null;
annicPatterns = new ArrayList<Pattern>();
annotationTypesMap = new HashMap<String, List<String>>();
luceneSearchThreads = new ArrayList<LuceneSearchThread>();
luceneSearchThreadIndex = 0;
success = false;
fwdIterationEnded = false;
wasDeleteQuery = false;
if (parameters == null)
throw new SearchException("Parameters cannot be null");
this.parameters = parameters;
/*
* lets first check if the query is to search the document names This is
* used when we only wants to search for documents stored under the specific
* corpus
*/
if (parameters.size() == 2 && parameters.get(Constants.INDEX_LOCATION_URL) != null) {
String corpusID = (String) parameters.get(Constants.CORPUS_ID);
String indexLocation = null;
try {
indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
if (corpusID != null && indexLocation != null) {
wasDeleteQuery = true;
Term term = new Term(Constants.CORPUS_ID, corpusID);
TermQuery tq = new TermQuery(term);
try {
gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
// and now execute the query
// result of which will be stored in hits
luceneHits = searcher.search(tq);
success = luceneHits.length() > 0 ? true : false;
return success;
} catch (IOException ioe) {
ioe.printStackTrace();
throw new SearchException(ioe);
}
}
}
// check for index locations
if (parameters.get(Constants.INDEX_LOCATIONS) == null) {
String indexLocation;
try {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
ArrayList<String> indexLocations = new ArrayList<String>();
indexLocations.add(indexLocation);
parameters.put(Constants.INDEX_LOCATIONS, indexLocations);
}
indexLocations = new ArrayList<String>((List<? extends String>) parameters.get(Constants.INDEX_LOCATIONS));
if (indexLocations.size() == 0)
throw new SearchException("Corpus is not initialized");
// check for valid context window
if (parameters.get(Constants.CONTEXT_WINDOW) == null)
throw new SearchException("Parameter " + Constants.CONTEXT_WINDOW + " is not provided!");
contextWindow = ((Integer) parameters.get(Constants.CONTEXT_WINDOW)).intValue();
if (getContextWindow().intValue() <= 0)
throw new SearchException("Context Window must be atleast 1 or > 1");
if (query == null)
throw new SearchException("Query is not initialized");
this.query = query;
this.corpusToSearchIn = (String) parameters.get(Constants.CORPUS_ID);
this.annotationSetToSearchIn = (String) parameters.get(Constants.ANNOTATION_SET_ID);
annicPatterns = new ArrayList<Pattern>();
annotationTypesMap = new HashMap<String, List<String>>();
luceneSearchThreads = new ArrayList<LuceneSearchThread>();
// TODO: is this really useful or used to have several indexLocations ?
for (int indexCounter = 0; indexCounter < indexLocations.size(); indexCounter++) {
String location = indexLocations.get(indexCounter);
// we create a separate Thread for each index
LuceneSearchThread lst = new LuceneSearchThread();
if (lst.search(query, contextWindow, location, corpusToSearchIn, annotationSetToSearchIn, this)) {
luceneSearchThreads.add(lst);
}
}
success = luceneSearchThreads.size() > 0 ? true : false;
return success;
}
use of gate.creole.annic.apache.lucene.search.IndexSearcher in project gate-core by GateNLP.
the class LuceneSearcher method getIndexedAnnotationSetNames.
/**
* This method returns a set of annotation set names that are indexed. Each
* entry has the following format:
* <p>
* corpusName;annotationSetName
* </p>
* where, the corpusName is the name of the corpus the annotationSetName
* belongs to.
*/
@Override
public String[] getIndexedAnnotationSetNames() throws SearchException {
String indexLocation;
try {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
annotationTypesMap = new HashMap<String, List<String>>();
Set<String> toReturn = new HashSet<String>();
try {
IndexReader reader = IndexReader.open(indexLocation);
try {
// lets first obtain stored corpora
TermEnum terms = reader.terms(new Term(Constants.ANNOTATION_SET_ID, ""));
if (terms == null) {
return new String[0];
}
// iterating over terms and finding out names of annotation sets indexed
Set<String> annotSets = new HashSet<String>();
boolean foundAnnotSet = false;
do {
Term t = terms.term();
if (t == null)
continue;
if (t.field().equals(Constants.ANNOTATION_SET_ID)) {
annotSets.add(t.text());
foundAnnotSet = true;
} else {
if (foundAnnotSet)
break;
}
} while (terms.next());
// but not all documents belong to corpora
for (String annotSet : annotSets) {
Term term = new Term(Constants.ANNOTATION_SET_ID, annotSet);
TermQuery tq = new TermQuery(term);
try {
gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
try {
Hits annotSetHits = searcher.search(tq);
for (int i = 0; i < annotSetHits.length(); i++) {
Document luceneDoc = annotSetHits.doc(i);
String corpusID = luceneDoc.get(Constants.CORPUS_ID);
if (corpusID == null)
corpusID = "";
toReturn.add(corpusID + ";" + annotSet);
// lets create a boolean query
Term annotSetTerm = new Term(Constants.ANNOTATION_SET_ID, annotSet);
TermQuery atq = new TermQuery(annotSetTerm);
BooleanQuery bq = new BooleanQuery();
bq.add(tq, true, false);
bq.add(atq, true, false);
gate.creole.annic.apache.lucene.search.Searcher indexFeatureSearcher = new IndexSearcher(indexLocation);
try {
Hits indexFeaturesHits = searcher.search(bq);
for (int j = 0; j < indexFeaturesHits.length(); j++) {
Document aDoc = indexFeaturesHits.doc(j);
String indexedFeatures = aDoc.get(Constants.INDEXED_FEATURES);
if (indexedFeatures != null) {
String[] features = indexedFeatures.split(";");
for (String aFeature : features) {
// AnnotationType.FeatureName
int index = aFeature.indexOf(".");
if (index == -1) {
continue;
}
String type = aFeature.substring(0, index);
String featureName = aFeature.substring(index + 1);
String key = corpusID + ";" + annotSet + ";" + type;
List<String> listOfFeatures = annotationTypesMap.get(key);
if (listOfFeatures == null) {
listOfFeatures = new ArrayList<String>();
annotationTypesMap.put(key, listOfFeatures);
}
if (!listOfFeatures.contains(featureName)) {
listOfFeatures.add(featureName);
}
}
}
}
} finally {
indexFeatureSearcher.close();
}
}
} finally {
searcher.close();
}
} catch (IOException ioe) {
ioe.printStackTrace();
throw new SearchException(ioe);
}
}
} finally {
reader.close();
}
} catch (IOException ioe) {
throw new SearchException(ioe);
}
return toReturn.toArray(new String[0]);
}
Aggregations