Search in sources :

Example 6 with SearchException

use of gate.creole.annic.SearchException in project gate-core by GateNLP.

the class LuceneSearcher method getIndexedAnnotationSetNames.

/**
 * This method returns a set of annotation set names that are indexed. Each
 * entry has the following format:
 * <p>
 * corpusName;annotationSetName
 * </p>
 * where, the corpusName is the name of the corpus the annotationSetName
 * belongs to.
 */
@Override
public String[] getIndexedAnnotationSetNames() throws SearchException {
    String indexLocation;
    try {
        indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
    } catch (URISyntaxException use) {
        indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
    }
    annotationTypesMap = new HashMap<String, List<String>>();
    Set<String> toReturn = new HashSet<String>();
    try {
        IndexReader reader = IndexReader.open(indexLocation);
        try {
            // lets first obtain stored corpora
            TermEnum terms = reader.terms(new Term(Constants.ANNOTATION_SET_ID, ""));
            if (terms == null) {
                return new String[0];
            }
            // iterating over terms and finding out names of annotation sets indexed
            Set<String> annotSets = new HashSet<String>();
            boolean foundAnnotSet = false;
            do {
                Term t = terms.term();
                if (t == null)
                    continue;
                if (t.field().equals(Constants.ANNOTATION_SET_ID)) {
                    annotSets.add(t.text());
                    foundAnnotSet = true;
                } else {
                    if (foundAnnotSet)
                        break;
                }
            } while (terms.next());
            // but not all documents belong to corpora
            for (String annotSet : annotSets) {
                Term term = new Term(Constants.ANNOTATION_SET_ID, annotSet);
                TermQuery tq = new TermQuery(term);
                try {
                    gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
                    try {
                        Hits annotSetHits = searcher.search(tq);
                        for (int i = 0; i < annotSetHits.length(); i++) {
                            Document luceneDoc = annotSetHits.doc(i);
                            String corpusID = luceneDoc.get(Constants.CORPUS_ID);
                            if (corpusID == null)
                                corpusID = "";
                            toReturn.add(corpusID + ";" + annotSet);
                            // lets create a boolean query
                            Term annotSetTerm = new Term(Constants.ANNOTATION_SET_ID, annotSet);
                            TermQuery atq = new TermQuery(annotSetTerm);
                            BooleanQuery bq = new BooleanQuery();
                            bq.add(tq, true, false);
                            bq.add(atq, true, false);
                            gate.creole.annic.apache.lucene.search.Searcher indexFeatureSearcher = new IndexSearcher(indexLocation);
                            try {
                                Hits indexFeaturesHits = searcher.search(bq);
                                for (int j = 0; j < indexFeaturesHits.length(); j++) {
                                    Document aDoc = indexFeaturesHits.doc(j);
                                    String indexedFeatures = aDoc.get(Constants.INDEXED_FEATURES);
                                    if (indexedFeatures != null) {
                                        String[] features = indexedFeatures.split(";");
                                        for (String aFeature : features) {
                                            // AnnotationType.FeatureName
                                            int index = aFeature.indexOf(".");
                                            if (index == -1) {
                                                continue;
                                            }
                                            String type = aFeature.substring(0, index);
                                            String featureName = aFeature.substring(index + 1);
                                            String key = corpusID + ";" + annotSet + ";" + type;
                                            List<String> listOfFeatures = annotationTypesMap.get(key);
                                            if (listOfFeatures == null) {
                                                listOfFeatures = new ArrayList<String>();
                                                annotationTypesMap.put(key, listOfFeatures);
                                            }
                                            if (!listOfFeatures.contains(featureName)) {
                                                listOfFeatures.add(featureName);
                                            }
                                        }
                                    }
                                }
                            } finally {
                                indexFeatureSearcher.close();
                            }
                        }
                    } finally {
                        searcher.close();
                    }
                } catch (IOException ioe) {
                    ioe.printStackTrace();
                    throw new SearchException(ioe);
                }
            }
        } finally {
            reader.close();
        }
    } catch (IOException ioe) {
        throw new SearchException(ioe);
    }
    return toReturn.toArray(new String[0]);
}
Also used : IndexSearcher(gate.creole.annic.apache.lucene.search.IndexSearcher) BooleanQuery(gate.creole.annic.apache.lucene.search.BooleanQuery) Hits(gate.creole.annic.apache.lucene.search.Hits) SearchException(gate.creole.annic.SearchException) URISyntaxException(java.net.URISyntaxException) TermEnum(gate.creole.annic.apache.lucene.index.TermEnum) Document(gate.creole.annic.apache.lucene.document.Document) URL(java.net.URL) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) TermQuery(gate.creole.annic.apache.lucene.search.TermQuery) Term(gate.creole.annic.apache.lucene.index.Term) IOException(java.io.IOException) IndexReader(gate.creole.annic.apache.lucene.index.IndexReader) File(java.io.File)

Example 7 with SearchException

use of gate.creole.annic.SearchException in project gate-core by GateNLP.

the class StatsCalculator method freq.

/**
 * @see #freq(List, String, String, String, boolean, boolean)
 */
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return 0;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    int count = 0;
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
        count += subAnnots.size();
    }
    return count;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Example 8 with SearchException

use of gate.creole.annic.SearchException in project gate-core by GateNLP.

the class LuceneDataStoreSearchGUI method setTarget.

/**
 * Called by the GUI when this viewer/editor has to initialise itself
 * for a specific object.
 *
 * @param target the object (be it a {@link gate.Resource},
 *          {@link gate.DataStore}or whatever) this viewer has to
 *          display
 */
@Override
public void setTarget(Object target) {
    if (!(target instanceof LuceneDataStoreImpl) && !(target instanceof Searcher)) {
        throw new IllegalArgumentException("The GATE LuceneDataStoreSearchGUI can only be used with a GATE LuceneDataStores!\n" + target.getClass().toString() + " is not a GATE LuceneDataStore or an object of Searcher!");
    }
    this.target = target;
    // standalone Java application
    if (target instanceof LuceneDataStoreImpl) {
        ((LuceneDataStoreImpl) target).addDatastoreListener(this);
        corpusToSearchIn.setEnabled(true);
        searcher = ((LuceneDataStoreImpl) target).getSearcher();
        updateSetsTypesAndFeatures();
        try {
            // get the corpus names from the datastore
            java.util.List<String> corpusPIds = ((LuceneDataStoreImpl) target).getLrIds(SerialCorpusImpl.class.getName());
            if (corpusIds != null) {
                for (Object corpusPId : corpusPIds) {
                    String name = ((LuceneDataStoreImpl) target).getLrName(corpusPId);
                    this.corpusIds.add(corpusPId);
                    // add the corpus name to combobox
                    ((DefaultComboBoxModel<String>) corpusToSearchIn.getModel()).addElement(name);
                }
            }
            SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    corpusToSearchIn.updateUI();
                    corpusToSearchIn.setSelectedItem(Constants.ENTIRE_DATASTORE);
                }
            });
        } catch (PersistenceException e) {
            System.out.println("Couldn't find any available corpusIds.");
            throw new GateRuntimeException(e);
        }
    } else // Java Web Start application
    {
        searcher = (Searcher) target;
        corpusToSearchIn.setEnabled(false);
        // find out all annotation sets that are indexed
        try {
            annotationSetIDsFromDataStore = searcher.getIndexedAnnotationSetNames();
            allAnnotTypesAndFeaturesFromDatastore = searcher.getAnnotationTypesMap();
            SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    updateAnnotationSetsList();
                }
            });
        } catch (SearchException e) {
            throw new GateRuntimeException(e);
        }
    }
}
Also used : Searcher(gate.creole.annic.Searcher) SearchException(gate.creole.annic.SearchException) LuceneDataStoreImpl(gate.persist.LuceneDataStoreImpl) DefaultComboBoxModel(javax.swing.DefaultComboBoxModel) SerialCorpusImpl(gate.corpora.SerialCorpusImpl) GateRuntimeException(gate.util.GateRuntimeException) PersistenceException(gate.persist.PersistenceException) EventObject(java.util.EventObject)

Example 9 with SearchException

use of gate.creole.annic.SearchException in project gate-core by GateNLP.

the class LuceneDataStoreSearchGUI method updateSetsTypesAndFeatures.

protected void updateSetsTypesAndFeatures() {
    try {
        annotationSetIDsFromDataStore = searcher.getIndexedAnnotationSetNames();
        allAnnotTypesAndFeaturesFromDatastore = searcher.getAnnotationTypesMap();
        SwingUtilities.invokeLater(new Runnable() {

            @Override
            public void run() {
                updateAnnotationSetsList();
            }
        });
    } catch (SearchException se) {
        throw new GateRuntimeException(se);
    }
}
Also used : GateRuntimeException(gate.util.GateRuntimeException) SearchException(gate.creole.annic.SearchException)

Example 10 with SearchException

use of gate.creole.annic.SearchException in project gate-core by GateNLP.

the class LuceneDataStoreSearchGUI method updateAnnotationTypesList.

protected void updateAnnotationTypesList() {
    String corpusName = (corpusToSearchIn.getSelectedItem().equals(Constants.ENTIRE_DATASTORE)) ? null : (String) corpusIds.get(corpusToSearchIn.getSelectedIndex() - 1);
    String annotationSetName = (annotationSetsToSearchIn.getSelectedItem().equals(Constants.ALL_SETS)) ? null : (String) annotationSetsToSearchIn.getSelectedItem();
    populatedAnnotationTypesAndFeatures = getTypesAndFeatures(corpusName, annotationSetName);
    int countTotal = 0;
    try {
        int count;
        TreeSet<String> ts = new TreeSet<String>(stringCollator);
        ts.addAll(populatedAnnotationTypesAndFeatures.keySet());
        globalStatisticsTableModel.setRowCount(0);
        for (String annotationType : ts) {
            // retrieves the number of occurrences for each Annotation Type
            // of the choosen Annotation Set
            count = searcher.freq(corpusName, annotationSetName, annotationType);
            globalStatisticsTableModel.addRow(new Object[] { annotationType, count });
            countTotal += count;
        }
    } catch (SearchException se) {
        se.printStackTrace();
        return;
    }
    if (countTotal == 0) {
        centerPanel.removeAll();
        centerPanel.add(new JLabel("<html>There is no annotation for the moment " + "for the selected corpus and annotation set.<br><br>" + "Select another corpus or annotation set or wait for the " + "end of the automatic indexation."), new GridBagConstraints());
    }
}
Also used : GridBagConstraints(java.awt.GridBagConstraints) TreeSet(java.util.TreeSet) SearchException(gate.creole.annic.SearchException) JLabel(javax.swing.JLabel)

Aggregations

SearchException (gate.creole.annic.SearchException)10 ArrayList (java.util.ArrayList)7 IOException (java.io.IOException)5 Pattern (gate.creole.annic.Pattern)4 List (java.util.List)4 Hit (gate.creole.annic.Hit)3 Term (gate.creole.annic.apache.lucene.index.Term)3 Hits (gate.creole.annic.apache.lucene.search.Hits)3 TermQuery (gate.creole.annic.apache.lucene.search.TermQuery)3 File (java.io.File)3 URISyntaxException (java.net.URISyntaxException)3 PatternAnnotation (gate.creole.annic.PatternAnnotation)2 Document (gate.creole.annic.apache.lucene.document.Document)2 BooleanQuery (gate.creole.annic.apache.lucene.search.BooleanQuery)2 IndexSearcher (gate.creole.annic.apache.lucene.search.IndexSearcher)2 GateRuntimeException (gate.util.GateRuntimeException)2 URL (java.net.URL)2 XStream (com.thoughtworks.xstream.XStream)1 StaxDriver (com.thoughtworks.xstream.io.xml.StaxDriver)1 SerialCorpusImpl (gate.corpora.SerialCorpusImpl)1