use of gate.creole.annic.SearchException in project gate-core by GateNLP.
the class LuceneSearcher method getIndexedAnnotationSetNames.
/**
* This method returns a set of annotation set names that are indexed. Each
* entry has the following format:
* <p>
* corpusName;annotationSetName
* </p>
* where, the corpusName is the name of the corpus the annotationSetName
* belongs to.
*/
@Override
public String[] getIndexedAnnotationSetNames() throws SearchException {
String indexLocation;
try {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
annotationTypesMap = new HashMap<String, List<String>>();
Set<String> toReturn = new HashSet<String>();
try {
IndexReader reader = IndexReader.open(indexLocation);
try {
// lets first obtain stored corpora
TermEnum terms = reader.terms(new Term(Constants.ANNOTATION_SET_ID, ""));
if (terms == null) {
return new String[0];
}
// iterating over terms and finding out names of annotation sets indexed
Set<String> annotSets = new HashSet<String>();
boolean foundAnnotSet = false;
do {
Term t = terms.term();
if (t == null)
continue;
if (t.field().equals(Constants.ANNOTATION_SET_ID)) {
annotSets.add(t.text());
foundAnnotSet = true;
} else {
if (foundAnnotSet)
break;
}
} while (terms.next());
// but not all documents belong to corpora
for (String annotSet : annotSets) {
Term term = new Term(Constants.ANNOTATION_SET_ID, annotSet);
TermQuery tq = new TermQuery(term);
try {
gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
try {
Hits annotSetHits = searcher.search(tq);
for (int i = 0; i < annotSetHits.length(); i++) {
Document luceneDoc = annotSetHits.doc(i);
String corpusID = luceneDoc.get(Constants.CORPUS_ID);
if (corpusID == null)
corpusID = "";
toReturn.add(corpusID + ";" + annotSet);
// lets create a boolean query
Term annotSetTerm = new Term(Constants.ANNOTATION_SET_ID, annotSet);
TermQuery atq = new TermQuery(annotSetTerm);
BooleanQuery bq = new BooleanQuery();
bq.add(tq, true, false);
bq.add(atq, true, false);
gate.creole.annic.apache.lucene.search.Searcher indexFeatureSearcher = new IndexSearcher(indexLocation);
try {
Hits indexFeaturesHits = searcher.search(bq);
for (int j = 0; j < indexFeaturesHits.length(); j++) {
Document aDoc = indexFeaturesHits.doc(j);
String indexedFeatures = aDoc.get(Constants.INDEXED_FEATURES);
if (indexedFeatures != null) {
String[] features = indexedFeatures.split(";");
for (String aFeature : features) {
// AnnotationType.FeatureName
int index = aFeature.indexOf(".");
if (index == -1) {
continue;
}
String type = aFeature.substring(0, index);
String featureName = aFeature.substring(index + 1);
String key = corpusID + ";" + annotSet + ";" + type;
List<String> listOfFeatures = annotationTypesMap.get(key);
if (listOfFeatures == null) {
listOfFeatures = new ArrayList<String>();
annotationTypesMap.put(key, listOfFeatures);
}
if (!listOfFeatures.contains(featureName)) {
listOfFeatures.add(featureName);
}
}
}
}
} finally {
indexFeatureSearcher.close();
}
}
} finally {
searcher.close();
}
} catch (IOException ioe) {
ioe.printStackTrace();
throw new SearchException(ioe);
}
}
} finally {
reader.close();
}
} catch (IOException ioe) {
throw new SearchException(ioe);
}
return toReturn.toArray(new String[0]);
}
use of gate.creole.annic.SearchException in project gate-core by GateNLP.
the class StatsCalculator method freq.
/**
* @see #freq(List, String, String, String, boolean, boolean)
*/
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return 0;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
int count = 0;
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
count += subAnnots.size();
}
return count;
}
use of gate.creole.annic.SearchException in project gate-core by GateNLP.
the class LuceneDataStoreSearchGUI method setTarget.
/**
* Called by the GUI when this viewer/editor has to initialise itself
* for a specific object.
*
* @param target the object (be it a {@link gate.Resource},
* {@link gate.DataStore}or whatever) this viewer has to
* display
*/
@Override
public void setTarget(Object target) {
if (!(target instanceof LuceneDataStoreImpl) && !(target instanceof Searcher)) {
throw new IllegalArgumentException("The GATE LuceneDataStoreSearchGUI can only be used with a GATE LuceneDataStores!\n" + target.getClass().toString() + " is not a GATE LuceneDataStore or an object of Searcher!");
}
this.target = target;
// standalone Java application
if (target instanceof LuceneDataStoreImpl) {
((LuceneDataStoreImpl) target).addDatastoreListener(this);
corpusToSearchIn.setEnabled(true);
searcher = ((LuceneDataStoreImpl) target).getSearcher();
updateSetsTypesAndFeatures();
try {
// get the corpus names from the datastore
java.util.List<String> corpusPIds = ((LuceneDataStoreImpl) target).getLrIds(SerialCorpusImpl.class.getName());
if (corpusIds != null) {
for (Object corpusPId : corpusPIds) {
String name = ((LuceneDataStoreImpl) target).getLrName(corpusPId);
this.corpusIds.add(corpusPId);
// add the corpus name to combobox
((DefaultComboBoxModel<String>) corpusToSearchIn.getModel()).addElement(name);
}
}
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
corpusToSearchIn.updateUI();
corpusToSearchIn.setSelectedItem(Constants.ENTIRE_DATASTORE);
}
});
} catch (PersistenceException e) {
System.out.println("Couldn't find any available corpusIds.");
throw new GateRuntimeException(e);
}
} else // Java Web Start application
{
searcher = (Searcher) target;
corpusToSearchIn.setEnabled(false);
// find out all annotation sets that are indexed
try {
annotationSetIDsFromDataStore = searcher.getIndexedAnnotationSetNames();
allAnnotTypesAndFeaturesFromDatastore = searcher.getAnnotationTypesMap();
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
updateAnnotationSetsList();
}
});
} catch (SearchException e) {
throw new GateRuntimeException(e);
}
}
}
use of gate.creole.annic.SearchException in project gate-core by GateNLP.
the class LuceneDataStoreSearchGUI method updateSetsTypesAndFeatures.
protected void updateSetsTypesAndFeatures() {
try {
annotationSetIDsFromDataStore = searcher.getIndexedAnnotationSetNames();
allAnnotTypesAndFeaturesFromDatastore = searcher.getAnnotationTypesMap();
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
updateAnnotationSetsList();
}
});
} catch (SearchException se) {
throw new GateRuntimeException(se);
}
}
use of gate.creole.annic.SearchException in project gate-core by GateNLP.
the class LuceneDataStoreSearchGUI method updateAnnotationTypesList.
protected void updateAnnotationTypesList() {
String corpusName = (corpusToSearchIn.getSelectedItem().equals(Constants.ENTIRE_DATASTORE)) ? null : (String) corpusIds.get(corpusToSearchIn.getSelectedIndex() - 1);
String annotationSetName = (annotationSetsToSearchIn.getSelectedItem().equals(Constants.ALL_SETS)) ? null : (String) annotationSetsToSearchIn.getSelectedItem();
populatedAnnotationTypesAndFeatures = getTypesAndFeatures(corpusName, annotationSetName);
int countTotal = 0;
try {
int count;
TreeSet<String> ts = new TreeSet<String>(stringCollator);
ts.addAll(populatedAnnotationTypesAndFeatures.keySet());
globalStatisticsTableModel.setRowCount(0);
for (String annotationType : ts) {
// retrieves the number of occurrences for each Annotation Type
// of the choosen Annotation Set
count = searcher.freq(corpusName, annotationSetName, annotationType);
globalStatisticsTableModel.addRow(new Object[] { annotationType, count });
countTotal += count;
}
} catch (SearchException se) {
se.printStackTrace();
return;
}
if (countTotal == 0) {
centerPanel.removeAll();
centerPanel.add(new JLabel("<html>There is no annotation for the moment " + "for the selected corpus and annotation set.<br><br>" + "Select another corpus or annotation set or wait for the " + "end of the automatic indexation."), new GridBagConstraints());
}
}
Aggregations