Search in sources :

Example 6 with Document

use of gate.Document in project gate-core by GateNLP.

the class NameBearerHandle method buildStaticPopupItems.

protected void buildStaticPopupItems() {
    // build the static part of the popup
    staticPopupItems = new ArrayList<JComponent>();
    if (target instanceof ProcessingResource && !(target instanceof Controller)) {
        // actions for PRs (but not Controllers)
        staticPopupItems.add(null);
        staticPopupItems.add(new XJMenuItem(new ReloadAction(), sListenerProxy));
        staticPopupItems.add(new XJMenuItem(new ApplicationWithPRAction(), sListenerProxy));
    } else if (target instanceof LanguageResource) {
        // Language Resources
        staticPopupItems.add(null);
        if (target instanceof Document) {
            staticPopupItems.add(new XJMenuItem(new CreateCorpusForDocAction(), sListenerProxy));
        }
        if (target instanceof gate.TextualDocument) {
            staticPopupItems.add(null);
            staticPopupItems.add(new DocumentExportMenu(this));
        } else if (target instanceof Corpus) {
            corpusFiller = new CorpusFillerComponent();
            scfInputDialog = new SingleConcatenatedFileInputDialog();
            staticPopupItems.add(new XJMenuItem(new PopulateCorpusAction(), sListenerProxy));
            staticPopupItems.add(new XJMenuItem(new PopulateCorpusFromSingleConcatenatedFileAction(), sListenerProxy));
            staticPopupItems.add(null);
            staticPopupItems.add(new DocumentExportMenu(this));
        }
        if (((LanguageResource) target).getDataStore() != null) {
            // this item can be used only if the resource belongs to a
            // datastore
            staticPopupItems.add(new XJMenuItem(new SaveAction(), sListenerProxy));
        }
        if (!(target instanceof AnnotationSchema)) {
            staticPopupItems.add(new XJMenuItem(new SaveToAction(), sListenerProxy));
        }
    }
    if (target instanceof Controller) {
        // Applications
        staticPopupItems.add(null);
        if (target instanceof SerialAnalyserController) {
            staticPopupItems.add(new XJMenuItem(new MakeConditionalAction(), sListenerProxy));
        }
        staticPopupItems.add(new XJMenuItem(new DumpToFileAction(), sListenerProxy));
        staticPopupItems.add(new XJMenuItem(new ExportApplicationAction(), sListenerProxy));
    }
}
Also used : LanguageResource(gate.LanguageResource) ProcessingResource(gate.ProcessingResource) Document(gate.Document) IndexedCorpus(gate.creole.ir.IndexedCorpus) Corpus(gate.Corpus) AnnotationSchema(gate.creole.AnnotationSchema) SerialAnalyserController(gate.creole.SerialAnalyserController) ConditionalSerialAnalyserController(gate.creole.ConditionalSerialAnalyserController) JComponent(javax.swing.JComponent) SerialAnalyserController(gate.creole.SerialAnalyserController) Controller(gate.Controller) ConditionalSerialAnalyserController(gate.creole.ConditionalSerialAnalyserController) CorpusController(gate.CorpusController) ConditionalController(gate.creole.ConditionalController) XJMenuItem(gate.swing.XJMenuItem)

Example 7 with Document

use of gate.Document in project gate-core by GateNLP.

the class CorpusPersistence method extractDataFromSource.

/**
 * Populates this Persistence with the data that needs to be stored from the
 * original source object.
 */
@Override
public void extractDataFromSource(Object source) throws PersistenceException {
    // check input
    if (!(source instanceof Corpus)) {
        throw new UnsupportedOperationException(getClass().getName() + " can only be used for " + Corpus.class.getName() + " objects!\n" + source.getClass().getName() + " is not a " + Corpus.class.getName());
    }
    Corpus corpus = (Corpus) source;
    super.extractDataFromSource(source);
    if (dsData == null) {
        // transient corpus; we still need to save the docs
        docList = new ArrayList<Serializable>();
        Iterator<Document> docIter = corpus.iterator();
        while (docIter.hasNext()) {
            docList.add(PersistenceManager.getPersistentRepresentation(docIter.next()));
        }
    } else {
        // persistent corpus; it takes care of documents by itself
        // nothing to do :)
        docList = null;
    }
}
Also used : Serializable(java.io.Serializable) Document(gate.Document) Corpus(gate.Corpus)

Example 8 with Document

use of gate.Document in project gate-core by GateNLP.

the class CorpusBenchmarkTool method evaluateCorpus.

// generateCorpus
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir) {
    // 1. check if we have input files and the processed Dir
    if (fileDir == null || !fileDir.exists())
        return;
    if (processedDir == null || !processedDir.exists())
        // if the user wants evaluation of marked and stored that's not possible
        if (isMarkedStored) {
            Out.prln("Cannot evaluate because no processed documents exist.");
            return;
        } else
            isMarkedClean = true;
    // create the error directory or clean it up if needed
    File errDir = null;
    if (isMoreInfoMode) {
        errDir = errorDir;
        if (errDir == null) {
            errDir = new File(currDir, ERROR_DIR_NAME);
        } else {
            // get rid of the directory, coz we wants it clean
            if (!Files.rmdir(errDir))
                Out.prln("cannot delete old error directory: " + errDir);
        }
        Out.prln("Create error directory: " + errDir + "<BR><BR>");
        errDir.mkdir();
    }
    // looked for marked texts only if the directory exists
    boolean processMarked = markedDir != null && markedDir.exists();
    if (!processMarked && (isMarkedStored || isMarkedClean)) {
        Out.prln("Cannot evaluate because no human-annotated documents exist.");
        return;
    }
    if (isMarkedStored) {
        evaluateMarkedStored(markedDir, processedDir, errDir);
        return;
    } else if (isMarkedClean) {
        evaluateMarkedClean(markedDir, fileDir, errDir);
        return;
    }
    Document persDoc = null;
    Document cleanDoc = null;
    Document markedDoc = null;
    // open the datastore and process each document
    try {
        // open the data store
        DataStore sds = Factory.openDataStore("gate.persist.SerialDataStore", processedDir.toURI().toURL().toExternalForm());
        List<String> lrIDs = sds.getLrIds("gate.corpora.DocumentImpl");
        for (int i = 0; i < lrIDs.size(); i++) {
            String docID = lrIDs.get(i);
            // read the stored document
            FeatureMap features = Factory.newFeatureMap();
            features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
            features.put(DataStore.LR_ID_FEATURE_NAME, docID);
            FeatureMap hparams = Factory.newFeatureMap();
            // Gate.setHiddenAttribute(hparams, true);
            persDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features, hparams);
            if (isMoreInfoMode) {
                StringBuffer errName = new StringBuffer(persDoc.getName());
                errName.replace(persDoc.getName().lastIndexOf("."), persDoc.getName().length(), ".err");
                Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + persDoc.getName() + "</a>" + "</H2>");
            } else
                Out.prln("<H2>" + persDoc.getName() + "</H2>");
            File cleanDocFile = new File(fileDir, persDoc.getName());
            // try reading the original document from clean
            if (!cleanDocFile.exists()) {
                Out.prln("Warning: Cannot find original document " + persDoc.getName() + " in " + fileDir);
            } else {
                FeatureMap params = Factory.newFeatureMap();
                params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocFile.toURI().toURL());
                params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                // create the document
                cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
                cleanDoc.setName(persDoc.getName());
            }
            // try finding the marked document
            StringBuffer docName = new StringBuffer(persDoc.getName());
            if (!isMarkedDS) {
                docName.replace(persDoc.getName().lastIndexOf("."), docName.length(), ".xml");
                File markedDocFile = new File(markedDir, docName.toString());
                if (!processMarked || !markedDocFile.exists()) {
                    Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
                } else {
                    FeatureMap params = Factory.newFeatureMap();
                    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
                    params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                    // create the document
                    markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
                    markedDoc.setName(persDoc.getName());
                }
            } else {
                // open marked from a DS
                // open the data store
                DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
                List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
                boolean found = false;
                int k = 0;
                // search for the marked doc with the same name
                while (k < lrIDs1.size() && !found) {
                    String docID1 = lrIDs1.get(k);
                    // read the stored document
                    FeatureMap features1 = Factory.newFeatureMap();
                    features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
                    features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
                    Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
                    // check whether this is our doc
                    if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(persDoc.getName())) {
                        found = true;
                        markedDoc = tempDoc;
                    } else
                        k++;
                }
            }
            evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
            if (persDoc != null) {
                final gate.Document pd = persDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(pd);
                    }
                });
            }
            if (cleanDoc != null) {
                final gate.Document cd = cleanDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(cd);
                    }
                });
            }
            if (markedDoc != null) {
                final gate.Document md = markedDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(md);
                    }
                });
            }
        }
        // for loop through saved docs
        sds.close();
    } catch (java.net.MalformedURLException ex) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
    } catch (PersistenceException ex1) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
    } catch (ResourceInstantiationException ex2) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
    }
}
Also used : Document(gate.Document) ResourceInstantiationException(gate.creole.ResourceInstantiationException) FeatureMap(gate.FeatureMap) SerialDataStore(gate.persist.SerialDataStore) DataStore(gate.DataStore) PersistenceException(gate.persist.PersistenceException) Document(gate.Document) File(java.io.File)

Example 9 with Document

use of gate.Document in project gate-core by GateNLP.

the class CorpusBenchmarkTool method evaluateMarkedClean.

// evaluateMarkedStored
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir) {
    Document persDoc = null;
    Document cleanDoc = null;
    Document markedDoc = null;
    File[] cleanDocs = cleanDir.listFiles();
    for (int i = 0; i < cleanDocs.length; i++) {
        if (!cleanDocs[i].isFile())
            continue;
        // try reading the original document from clean
        FeatureMap params = Factory.newFeatureMap();
        try {
            params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocs[i].toURI().toURL());
        } catch (java.net.MalformedURLException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
        params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
        FeatureMap hparams = Factory.newFeatureMap();
        // create the document
        try {
            cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDocs[i].getName());
        } catch (gate.creole.ResourceInstantiationException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        if (isMoreInfoMode) {
            StringBuffer errName = new StringBuffer(cleanDocs[i].getName());
            errName.replace(cleanDocs[i].getName().lastIndexOf("."), cleanDocs[i].getName().length(), ".err");
            Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + cleanDocs[i].getName() + "</a>" + "</H2>");
        } else
            Out.prln("<H2>" + cleanDocs[i].getName() + "</H2>");
        // try finding the marked document
        if (!isMarkedDS) {
            StringBuffer docName = new StringBuffer(cleanDoc.getName());
            docName.replace(cleanDoc.getName().lastIndexOf("."), docName.length(), ".xml");
            File markedDocFile = new File(markedDir, docName.toString());
            if (!markedDocFile.exists()) {
                Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
                continue;
            } else {
                params = Factory.newFeatureMap();
                try {
                    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
                } catch (java.net.MalformedURLException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
                // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
                params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                // create the document
                try {
                    markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDoc.getName());
                } catch (gate.creole.ResourceInstantiationException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
            }
        // if markedDoc exists
        } else {
            try {
                // open marked from a DS
                // open the data store
                DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
                List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
                boolean found = false;
                int k = 0;
                // search for the marked doc with the same name
                while (k < lrIDs1.size() && !found) {
                    String docID1 = lrIDs1.get(k);
                    // read the stored document
                    FeatureMap features1 = Factory.newFeatureMap();
                    features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
                    features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
                    Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
                    // check whether this is our doc
                    if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(cleanDoc.getName())) {
                        found = true;
                        markedDoc = tempDoc;
                    } else
                        k++;
                }
            } catch (java.net.MalformedURLException ex) {
                Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
            } catch (gate.persist.PersistenceException ex1) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            } catch (gate.creole.ResourceInstantiationException ex2) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            }
        }
        try {
            evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
        } catch (gate.creole.ResourceInstantiationException ex) {
            ex.printStackTrace();
            Out.prln("Evaluate failed on document: " + cleanDoc.getName());
        }
        if (persDoc != null) {
            final gate.Document pd = persDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(pd);
                }
            });
        }
        if (cleanDoc != null) {
            final gate.Document cd = cleanDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(cd);
                }
            });
        }
        if (markedDoc != null) {
            final gate.Document md = markedDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(md);
                }
            });
        }
    }
// for loop through clean docs
}
Also used : ResourceInstantiationException(gate.creole.ResourceInstantiationException) Document(gate.Document) FeatureMap(gate.FeatureMap) PersistenceException(gate.persist.PersistenceException) SerialDataStore(gate.persist.SerialDataStore) DataStore(gate.DataStore) Document(gate.Document) File(java.io.File)

Example 10 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestXml method testAnnotationConsistencyForSaveAsXml.

// testUnpackMarkup()
/*
   * This method runs ANNIE with defaults on a document, then saves
   * it as a GATE XML document and loads it back. All the annotations on the
   * loaded document should be the same as the original ones.
   *
   * It also verifies if the matches feature still holds after an export/import to XML
   */
public void testAnnotationConsistencyForSaveAsXml() throws Exception {
    // Load a document from the test repository
    // Document origDoc = gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
    String testDoc = gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml");
    Document origDoc = gate.Factory.newDocument(testDoc);
    // Verifies if the maximum annotation ID on the origDoc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(origDoc);
    // create a couple of annotations with features we can look at after a round trip to disc
    Integer ann1ID = origDoc.getAnnotations().add(0L, 10L, "Test", Factory.newFeatureMap());
    Integer ann2ID = origDoc.getAnnotations().add(15L, 20L, "Test", Factory.newFeatureMap());
    origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann2ID }));
    origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann1ID }));
    // SaveAS XML and reload the document into another GATE doc
    // Export the Gate document called origDoc as XML, into a temp file,
    // using the working encoding
    File xmlFile = Files.writeTempFile(origDoc.toXml(), workingEncoding);
    System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
    Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
    // Verifies if the maximum annotation ID on the origDoc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(reloadedDoc);
    // Verify if the annotations are identical in the two docs.
    Map<Integer, Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
    Map<Integer, Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
    // Verifies if the reloaded annotations are the same as the original ones
    verifyIDConsistency(origAnnotMap, reloadedAnnMap);
    // Build the original Matches map
    // ID  -> List of IDs
    Map<Integer, List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
    // extracted from the reloadedMAp
    for (Iterator<Integer> it = origMatchesMap.keySet().iterator(); it.hasNext(); ) {
        Integer id = it.next();
        Annotation origAnnot = origAnnotMap.get(id);
        assertTrue("Couldn't find an original annot with ID=" + id, origAnnot != null);
        Annotation reloadedAnnot = reloadedAnnMap.get(id);
        assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot != null);
        compareAnnot(origAnnot, reloadedAnnot);
        // Iterate through the matches list and repeat the comparison
        List<Integer> matchesList = origMatchesMap.get(id);
        for (Iterator<Integer> itList = matchesList.iterator(); itList.hasNext(); ) {
            Integer matchId = itList.next();
            Annotation origA = origAnnotMap.get(matchId);
            assertTrue("Couldn't find an original annot with ID=" + matchId, origA != null);
            Annotation reloadedA = reloadedAnnMap.get(matchId);
            assertTrue("Couldn't find a reloaded annot with ID=" + matchId, reloadedA != null);
            compareAnnot(origA, reloadedA);
        }
    // End for
    }
    // End for
    // Clean up the XMl file
    xmlFile.delete();
}
Also used : List(java.util.List) LinkedList(java.util.LinkedList) TestDocument(gate.corpora.TestDocument) Document(gate.Document) File(java.io.File) Annotation(gate.Annotation)

Aggregations

Document (gate.Document)47 File (java.io.File)17 FeatureMap (gate.FeatureMap)16 URL (java.net.URL)12 AnnotationSet (gate.AnnotationSet)9 TestDocument (gate.corpora.TestDocument)9 Annotation (gate.Annotation)7 Corpus (gate.Corpus)7 ResourceInstantiationException (gate.creole.ResourceInstantiationException)7 PersistenceException (gate.persist.PersistenceException)6 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LanguageAnalyser (gate.LanguageAnalyser)4 SerialDataStore (gate.persist.SerialDataStore)4 GateRuntimeException (gate.util.GateRuntimeException)4 ActionEvent (java.awt.event.ActionEvent)4 List (java.util.List)4 AbstractAction (javax.swing.AbstractAction)4