Search in sources :

Example 1 with Pattern

use of gate.creole.annic.Pattern in project gate-core by GateNLP.

the class LuceneSearcher method next.

/**
 * Return the next numberOfHits -1 indicates all
 */
@Override
public Hit[] next(int numberOfHits) throws SearchException {
    annicPatterns = new ArrayList<Pattern>();
    if (!success) {
        this.annicPatterns = new ArrayList<Pattern>();
        return getHits();
    }
    if (fwdIterationEnded) {
        this.annicPatterns = new ArrayList<Pattern>();
        return getHits();
    }
    try {
        if (wasDeleteQuery) {
            List<String> docIDs = new ArrayList<String>();
            List<String> setNames = new ArrayList<String>();
            for (int i = 0; i < luceneHits.length(); i++) {
                Document luceneDoc = luceneHits.doc(i);
                String documentID = luceneDoc.get(Constants.DOCUMENT_ID);
                String annotationSetID = luceneDoc.get(Constants.ANNOTATION_SET_ID);
                int index = docIDs.indexOf(documentID);
                if (index == -1) {
                    docIDs.add(documentID);
                    setNames.add(annotationSetID);
                } else {
                    if (!setNames.get(index).equals(annotationSetID)) {
                        docIDs.add(documentID);
                        setNames.add(annotationSetID);
                    }
                }
            }
            Hit[] toReturn = new Hit[docIDs.size()];
            for (int i = 0; i < toReturn.length; i++) {
                toReturn[i] = new Hit(docIDs.get(i), setNames.get(i), 0, 0, "");
            }
            return toReturn;
        }
        for (; luceneSearchThreadIndex < luceneSearchThreads.size(); luceneSearchThreadIndex++) {
            LuceneSearchThread lst = luceneSearchThreads.get(luceneSearchThreadIndex);
            List<Pattern> results = lst.next(numberOfHits);
            if (results != null) {
                if (numberOfHits != -1) {
                    numberOfHits -= results.size();
                }
                this.annicPatterns.addAll(results);
                if (numberOfHits == 0) {
                    return getHits();
                }
            }
        }
        // if we are here, there wer no sufficient patterns available
        // so what we do is make success to false so that this method
        // return null on next call
        fwdIterationEnded = true;
        return getHits();
    } catch (Exception e) {
        throw new SearchException(e);
    }
}
Also used : Pattern(gate.creole.annic.Pattern) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException) Document(gate.creole.annic.apache.lucene.document.Document) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) SearchException(gate.creole.annic.SearchException) Hit(gate.creole.annic.Hit)

Example 2 with Pattern

use of gate.creole.annic.Pattern in project gate-core by GateNLP.

the class StatsCalculator method freqForAllValues.

/**
 * Calculates frequencies for all possible values of the provided AT.feature
 * @param patternsToSearchIn
 * @param annotationType
 * @param feature
 * @param inMatchedSpan
 * @param inContext
 * @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
 * @throws SearchException
 */
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
    Map<String, Integer> toReturn = new HashMap<String, Integer>();
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return toReturn;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
        for (PatternAnnotation pa : subAnnots) {
            String uniqueKey = pa.getFeatures().get(feature);
            Integer counter = toReturn.get(uniqueKey);
            if (counter == null) {
                counter = 1;
                toReturn.put(uniqueKey, counter);
            } else {
                counter = counter.intValue() + 1;
                toReturn.put(uniqueKey, counter);
            }
        }
    }
    return toReturn;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) HashMap(java.util.HashMap) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Example 3 with Pattern

use of gate.creole.annic.Pattern in project gate-core by GateNLP.

the class LuceneDataStoreSearchGUI method updateStackView.

/**
 * Updates the annotation stack in the central view.
 */
protected void updateStackView() {
    GridBagConstraints gbc = new GridBagConstraints();
    gbc.gridx = 0;
    gbc.gridy = 0;
    gbc.fill = GridBagConstraints.BOTH;
    if (resultTable.getSelectedRow() == -1) {
        // no result is selected in the result table
        centerPanel.removeAll();
        if (resultTable.getRowCount() > 0) {
            centerPanel.add(new JLabel("Select a row in the results table below."), gbc);
        } else {
            if (numberOfResultsSlider.getValue() > (numberOfResultsSlider.getMaximum() - 100)) {
                centerPanel.add(new JLabel("Retrieving all results..."), gbc);
            } else {
                centerPanel.add(new JLabel("Retrieving " + numberOfResultsSlider.getValue() + " results..."), gbc);
            }
        }
        centerPanel.validate();
        centerPanel.repaint();
        return;
    }
    // get information for the selected row in the results table
    Pattern result = (Pattern) results.get(resultTable.rowViewToModel(resultTable.getSelectionModel().getLeadSelectionIndex()));
    // initialize the annotation stack
    centerPanel.setText(result.getPatternText());
    centerPanel.setExpressionStartOffset(result.getStartOffset());
    centerPanel.setExpressionEndOffset(result.getEndOffset());
    centerPanel.setContextBeforeSize(result.getStartOffset() - result.getLeftContextStartOffset());
    centerPanel.setContextAfterSize(result.getRightContextEndOffset() - result.getEndOffset());
    centerPanel.setLastRowButton(configureStackViewButton);
    centerPanel.setTextMouseListener(new TextMouseListener());
    centerPanel.setHeaderMouseListener(new HeaderMouseListener());
    centerPanel.setAnnotationMouseListener(new AnnotationMouseListener());
    centerPanel.clearAllRows();
    // add each row to the annotation stack
    for (int row = 0; row < numStackRows; row++) {
        if (stackRows[row][DISPLAY].equals("false")) {
            continue;
        }
        String type = stackRows[row][ANNOTATION_TYPE];
        String feature = stackRows[row][FEATURE];
        String shortcut = stackRows[row][SHORTCUT];
        // remove button displayed at the end of each row
        JButton removeRowButton = new ButtonBorder(new Color(250, 250, 250), new Insets(0, 3, 0, 3), true);
        removeRowButton.setIcon(MainFrame.getIcon("Remove"));
        removeRowButton.setToolTipText("Hide this row.");
        final String typeFinal = type;
        final String featureFinal = feature;
        removeRowButton.addActionListener(new ActionListener() {

            @Override
            public void actionPerformed(ActionEvent ie) {
                int row = findStackRow(ANNOTATION_TYPE, typeFinal, FEATURE, featureFinal);
                if (row >= 0) {
                    stackRows[row][DISPLAY] = "false";
                    saveStackViewConfiguration();
                }
                updateStackView();
            }
        });
        int crop;
        if (stackRows[row][CROP].equals("Crop start")) {
            crop = AnnotationStack.CROP_START;
        } else if (stackRows[row][CROP].equals("Crop end")) {
            crop = AnnotationStack.CROP_END;
        } else {
            crop = AnnotationStack.CROP_MIDDLE;
        }
        centerPanel.addRow(null, type, feature, removeRowButton, shortcut, crop);
        // annotations for this row
        PatternAnnotation[] annotations = result.getPatternAnnotations(type);
        if (annotations != null && annotations.length > 0) {
            for (PatternAnnotation annotation : annotations) {
                FeatureMap features = Factory.newFeatureMap();
                features.putAll(annotation.getFeatures());
                centerPanel.addAnnotation(annotation.getStartOffset(), annotation.getEndOffset(), annotation.getType(), features);
            }
        }
    }
    // draw the annotation stack
    centerPanel.drawStack();
}
Also used : Pattern(gate.creole.annic.Pattern) GridBagConstraints(java.awt.GridBagConstraints) Insets(java.awt.Insets) ActionEvent(java.awt.event.ActionEvent) PatternAnnotation(gate.creole.annic.PatternAnnotation) Color(java.awt.Color) JButton(javax.swing.JButton) JLabel(javax.swing.JLabel) FeatureMap(gate.FeatureMap) ActionListener(java.awt.event.ActionListener)

Example 4 with Pattern

use of gate.creole.annic.Pattern in project gate-core by GateNLP.

the class LuceneSearchThread method locatePatterns.

/**
 * Locates the valid patterns in token stream and discards the invalid
 * first term positions returned by the lucene searcher.
 */
private List<Pattern> locatePatterns(String docID, String annotationSetName, List<List<PatternAnnotation>> gateAnnotations, List<?> firstTermPositions, List<Integer> patternLength, String queryString) {
    // patterns
    List<Pattern> pats = new ArrayList<Pattern>();
    for (int i = 0; i < gateAnnotations.size(); i++) {
        // each element in the tokens stream is a pattern
        List<PatternAnnotation> annotations = gateAnnotations.get(i);
        if (annotations.size() == 0) {
            continue;
        }
        // from this annotations we need to create a text string
        // so lets find out the smallest and the highest offsets
        int smallest = Integer.MAX_VALUE;
        int highest = -1;
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getStartOffset() < smallest) {
                smallest = ga.getStartOffset();
            }
            if (ga.getEndOffset() > highest) {
                highest = ga.getEndOffset();
            }
        }
        // we have smallest and highest offsets
        char[] patternText = new char[highest - smallest];
        for (int j = 0; j < patternText.length; j++) {
            patternText[j] = ' ';
        }
        // and now place the text
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getText() == null) {
                // this is to avoid annotations such as split
                continue;
            }
            for (int k = ga.getStartOffset() - smallest, m = 0; m < ga.getText().length() && k < patternText.length; m++, k++) {
                patternText[k] = ga.getText().charAt(m);
            }
            // we will initiate the annotTypes as well
            if (luceneSearcher.annotationTypesMap.keySet().contains(ga.getType())) {
                List<String> aFeatures = luceneSearcher.annotationTypesMap.get(ga.getType());
                Map<String, String> features = ga.getFeatures();
                if (features != null) {
                    Iterator<String> fSet = features.keySet().iterator();
                    while (fSet.hasNext()) {
                        String feature = fSet.next();
                        if (!aFeatures.contains(feature)) {
                            aFeatures.add(feature);
                        }
                    }
                }
                luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
            } else {
                Map<String, String> features = ga.getFeatures();
                List<String> aFeatures = new ArrayList<String>();
                aFeatures.add("All");
                if (features != null) {
                    aFeatures.addAll(features.keySet());
                }
                luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
            }
        // end of initializing annotationTypes for the comboBox
        }
        // we have the text
        // smallest is the textStOffset
        // highest is the textEndOffset
        // how to find the patternStartOffset
        int stPos = ((Integer) firstTermPositions.get(i)).intValue();
        int endOffset = patternLength.get(i).intValue();
        int patStart = Integer.MAX_VALUE;
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getPosition() == stPos) {
                if (ga.getStartOffset() < patStart) {
                    patStart = ga.getStartOffset();
                }
            }
        }
        if (patStart == Integer.MAX_VALUE) {
            continue;
        }
        if (patStart < smallest || endOffset > highest) {
            continue;
        }
        // now create the pattern for this
        Pattern ap = new Pattern(docID, annotationSetName, new String(patternText), patStart, endOffset, smallest, highest, annotations, queryString);
        pats.add(ap);
    }
    return pats;
}
Also used : Pattern(gate.creole.annic.Pattern) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList)

Example 5 with Pattern

use of gate.creole.annic.Pattern in project gate-core by GateNLP.

the class LuceneSearcher method search.

/**
 * Method retunrs true/false indicating whether results were found or not.
 */
@SuppressWarnings("unchecked")
@Override
public boolean search(String query, Map<String, Object> parameters) throws SearchException {
    luceneHits = null;
    annicPatterns = new ArrayList<Pattern>();
    annotationTypesMap = new HashMap<String, List<String>>();
    luceneSearchThreads = new ArrayList<LuceneSearchThread>();
    luceneSearchThreadIndex = 0;
    success = false;
    fwdIterationEnded = false;
    wasDeleteQuery = false;
    if (parameters == null)
        throw new SearchException("Parameters cannot be null");
    this.parameters = parameters;
    /*
     * lets first check if the query is to search the document names This is
     * used when we only wants to search for documents stored under the specific
     * corpus
     */
    if (parameters.size() == 2 && parameters.get(Constants.INDEX_LOCATION_URL) != null) {
        String corpusID = (String) parameters.get(Constants.CORPUS_ID);
        String indexLocation = null;
        try {
            indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
        } catch (URISyntaxException use) {
            indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
        }
        if (corpusID != null && indexLocation != null) {
            wasDeleteQuery = true;
            Term term = new Term(Constants.CORPUS_ID, corpusID);
            TermQuery tq = new TermQuery(term);
            try {
                gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
                // and now execute the query
                // result of which will be stored in hits
                luceneHits = searcher.search(tq);
                success = luceneHits.length() > 0 ? true : false;
                return success;
            } catch (IOException ioe) {
                ioe.printStackTrace();
                throw new SearchException(ioe);
            }
        }
    }
    // check for index locations
    if (parameters.get(Constants.INDEX_LOCATIONS) == null) {
        String indexLocation;
        try {
            indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
        } catch (URISyntaxException use) {
            indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
        }
        ArrayList<String> indexLocations = new ArrayList<String>();
        indexLocations.add(indexLocation);
        parameters.put(Constants.INDEX_LOCATIONS, indexLocations);
    }
    indexLocations = new ArrayList<String>((List<? extends String>) parameters.get(Constants.INDEX_LOCATIONS));
    if (indexLocations.size() == 0)
        throw new SearchException("Corpus is not initialized");
    // check for valid context window
    if (parameters.get(Constants.CONTEXT_WINDOW) == null)
        throw new SearchException("Parameter " + Constants.CONTEXT_WINDOW + " is not provided!");
    contextWindow = ((Integer) parameters.get(Constants.CONTEXT_WINDOW)).intValue();
    if (getContextWindow().intValue() <= 0)
        throw new SearchException("Context Window must be atleast 1 or > 1");
    if (query == null)
        throw new SearchException("Query is not initialized");
    this.query = query;
    this.corpusToSearchIn = (String) parameters.get(Constants.CORPUS_ID);
    this.annotationSetToSearchIn = (String) parameters.get(Constants.ANNOTATION_SET_ID);
    annicPatterns = new ArrayList<Pattern>();
    annotationTypesMap = new HashMap<String, List<String>>();
    luceneSearchThreads = new ArrayList<LuceneSearchThread>();
    // TODO: is this really useful or used to have several indexLocations ?
    for (int indexCounter = 0; indexCounter < indexLocations.size(); indexCounter++) {
        String location = indexLocations.get(indexCounter);
        // we create a separate Thread for each index
        LuceneSearchThread lst = new LuceneSearchThread();
        if (lst.search(query, contextWindow, location, corpusToSearchIn, annotationSetToSearchIn, this)) {
            luceneSearchThreads.add(lst);
        }
    }
    success = luceneSearchThreads.size() > 0 ? true : false;
    return success;
}
Also used : IndexSearcher(gate.creole.annic.apache.lucene.search.IndexSearcher) Pattern(gate.creole.annic.Pattern) TermQuery(gate.creole.annic.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException) URISyntaxException(java.net.URISyntaxException) Term(gate.creole.annic.apache.lucene.index.Term) IOException(java.io.IOException) URL(java.net.URL) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File)

Aggregations

Pattern (gate.creole.annic.Pattern)7 ArrayList (java.util.ArrayList)5 PatternAnnotation (gate.creole.annic.PatternAnnotation)4 SearchException (gate.creole.annic.SearchException)4 Hit (gate.creole.annic.Hit)3 FeatureMap (gate.FeatureMap)2 Color (java.awt.Color)2 GridBagConstraints (java.awt.GridBagConstraints)2 Insets (java.awt.Insets)2 ActionEvent (java.awt.event.ActionEvent)2 ActionListener (java.awt.event.ActionListener)2 IOException (java.io.IOException)2 URISyntaxException (java.net.URISyntaxException)2 Document (gate.Document)1 Searcher (gate.creole.annic.Searcher)1 Document (gate.creole.annic.apache.lucene.document.Document)1 Term (gate.creole.annic.apache.lucene.index.Term)1 IndexSearcher (gate.creole.annic.apache.lucene.search.IndexSearcher)1 TermQuery (gate.creole.annic.apache.lucene.search.TermQuery)1 AnnotationStack (gate.gui.docview.AnnotationStack)1