Search in sources :

Example 11 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class ParameterList method getDefaults.

// getInitimeDefaults()
/**
 * Get default parameter value set. Where more than one default
 * is possible amongst disjunctive parameters, only the first will be set.
 * To check if the default set is comprehensive,
 * use <TT>isFullyDefaulted()</TT>.
 * @see #isFullyDefaulted()
 */
public FeatureMap getDefaults(List<List<Parameter>> parameters) throws ParameterException {
    FeatureMap defaults = Factory.newFeatureMap();
    // each element of the parameters list is a list of (disjunctive) params
    Iterator<List<Parameter>> disjIter = parameters.iterator();
    // for each parameter disjunction in parameters
    disjIterLoop: while (disjIter.hasNext()) {
        // were any of this disj optional?
        boolean optional = false;
        // get an iterator for this disjunction of parameters
        List<Parameter> paramDisj = disjIter.next();
        Iterator<Parameter> paramsIter = paramDisj.iterator();
        // for each parameter in the disjunction
        while (paramsIter.hasNext()) {
            Parameter param = paramsIter.next();
            if (DEBUG)
                Out.prln("Examining " + param);
            if (!optional)
                optional = param.isOptional();
            // try and find a default value
            Object defaultValue = param.calculateDefaultValue();
            // no default found
            if (defaultValue == null) {
                // we've got at least one non-optional param unset
                if (!optional && !paramsIter.hasNext()) {
                    fullyDefaulted = false;
                }
            // valid default found - set it and continue with the next disj
            } else {
                defaults.put(param.getName(), defaultValue);
                continue disjIterLoop;
            }
        }
    // paramsIter
    }
    return defaults;
}
Also used : FeatureMap(gate.FeatureMap)

Example 12 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class SerialDataStore method adopt.

// delete(lr)
/**
 * Adopt a resource for persistence.
 */
@Override
public LanguageResource adopt(LanguageResource lr) throws PersistenceException {
    // ignore security info
    // check the LR's current DS
    DataStore currentDS = lr.getDataStore();
    if (currentDS == null) {
        // an orphan - do the adoption
        LanguageResource res = lr;
        if (lr instanceof Corpus) {
            FeatureMap features1 = Factory.newFeatureMap();
            features1.put("transientSource", lr);
            try {
                // here we create the persistent LR via Factory, so it's registered
                // in GATE
                res = (LanguageResource) Factory.createResource("gate.corpora.SerialCorpusImpl", features1);
            // Here the transient corpus is not deleted from the CRI, because
            // this might not always be the desired behaviour
            // since we chose that it is for the GUI, this functionality is
            // now move to the 'Save to' action code in NameBearerHandle
            } catch (gate.creole.ResourceInstantiationException ex) {
                throw new GateRuntimeException(ex.getMessage());
            }
        }
        res.setDataStore(this);
        // let the world know
        fireResourceAdopted(new DatastoreEvent(this, DatastoreEvent.RESOURCE_ADOPTED, lr, null));
        return res;
    } else if (// adopted already here
    currentDS.equals(this))
        return lr;
    else {
        // someone else's child
        throw new PersistenceException("Can't adopt a resource which is already in a different datastore");
    }
}
Also used : FeatureMap(gate.FeatureMap) LanguageResource(gate.LanguageResource) DataStore(gate.DataStore) GateRuntimeException(gate.util.GateRuntimeException) DatastoreEvent(gate.event.DatastoreEvent) Corpus(gate.Corpus)

Example 13 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class CorpusBenchmarkTool method evaluateCorpus.

// generateCorpus
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir) {
    // 1. check if we have input files and the processed Dir
    if (fileDir == null || !fileDir.exists())
        return;
    if (processedDir == null || !processedDir.exists())
        // if the user wants evaluation of marked and stored that's not possible
        if (isMarkedStored) {
            Out.prln("Cannot evaluate because no processed documents exist.");
            return;
        } else
            isMarkedClean = true;
    // create the error directory or clean it up if needed
    File errDir = null;
    if (isMoreInfoMode) {
        errDir = errorDir;
        if (errDir == null) {
            errDir = new File(currDir, ERROR_DIR_NAME);
        } else {
            // get rid of the directory, coz we wants it clean
            if (!Files.rmdir(errDir))
                Out.prln("cannot delete old error directory: " + errDir);
        }
        Out.prln("Create error directory: " + errDir + "<BR><BR>");
        errDir.mkdir();
    }
    // looked for marked texts only if the directory exists
    boolean processMarked = markedDir != null && markedDir.exists();
    if (!processMarked && (isMarkedStored || isMarkedClean)) {
        Out.prln("Cannot evaluate because no human-annotated documents exist.");
        return;
    }
    if (isMarkedStored) {
        evaluateMarkedStored(markedDir, processedDir, errDir);
        return;
    } else if (isMarkedClean) {
        evaluateMarkedClean(markedDir, fileDir, errDir);
        return;
    }
    Document persDoc = null;
    Document cleanDoc = null;
    Document markedDoc = null;
    // open the datastore and process each document
    try {
        // open the data store
        DataStore sds = Factory.openDataStore("gate.persist.SerialDataStore", processedDir.toURI().toURL().toExternalForm());
        List<String> lrIDs = sds.getLrIds("gate.corpora.DocumentImpl");
        for (int i = 0; i < lrIDs.size(); i++) {
            String docID = lrIDs.get(i);
            // read the stored document
            FeatureMap features = Factory.newFeatureMap();
            features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
            features.put(DataStore.LR_ID_FEATURE_NAME, docID);
            FeatureMap hparams = Factory.newFeatureMap();
            // Gate.setHiddenAttribute(hparams, true);
            persDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features, hparams);
            if (isMoreInfoMode) {
                StringBuffer errName = new StringBuffer(persDoc.getName());
                errName.replace(persDoc.getName().lastIndexOf("."), persDoc.getName().length(), ".err");
                Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + persDoc.getName() + "</a>" + "</H2>");
            } else
                Out.prln("<H2>" + persDoc.getName() + "</H2>");
            File cleanDocFile = new File(fileDir, persDoc.getName());
            // try reading the original document from clean
            if (!cleanDocFile.exists()) {
                Out.prln("Warning: Cannot find original document " + persDoc.getName() + " in " + fileDir);
            } else {
                FeatureMap params = Factory.newFeatureMap();
                params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocFile.toURI().toURL());
                params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                // create the document
                cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
                cleanDoc.setName(persDoc.getName());
            }
            // try finding the marked document
            StringBuffer docName = new StringBuffer(persDoc.getName());
            if (!isMarkedDS) {
                docName.replace(persDoc.getName().lastIndexOf("."), docName.length(), ".xml");
                File markedDocFile = new File(markedDir, docName.toString());
                if (!processMarked || !markedDocFile.exists()) {
                    Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
                } else {
                    FeatureMap params = Factory.newFeatureMap();
                    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
                    params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                    // create the document
                    markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams);
                    markedDoc.setName(persDoc.getName());
                }
            } else {
                // open marked from a DS
                // open the data store
                DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
                List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
                boolean found = false;
                int k = 0;
                // search for the marked doc with the same name
                while (k < lrIDs1.size() && !found) {
                    String docID1 = lrIDs1.get(k);
                    // read the stored document
                    FeatureMap features1 = Factory.newFeatureMap();
                    features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
                    features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
                    Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
                    // check whether this is our doc
                    if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(persDoc.getName())) {
                        found = true;
                        markedDoc = tempDoc;
                    } else
                        k++;
                }
            }
            evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
            if (persDoc != null) {
                final gate.Document pd = persDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(pd);
                    }
                });
            }
            if (cleanDoc != null) {
                final gate.Document cd = cleanDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(cd);
                    }
                });
            }
            if (markedDoc != null) {
                final gate.Document md = markedDoc;
                javax.swing.SwingUtilities.invokeLater(new Runnable() {

                    @Override
                    public void run() {
                        Factory.deleteResource(md);
                    }
                });
            }
        }
        // for loop through saved docs
        sds.close();
    } catch (java.net.MalformedURLException ex) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex.getMessage()).initCause(ex);
    } catch (PersistenceException ex1) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex1.getMessage()).initCause(ex1);
    } catch (ResourceInstantiationException ex2) {
        throw (GateRuntimeException) new GateRuntimeException("CorpusBenchmark: " + ex2.getMessage()).initCause(ex2);
    }
}
Also used : Document(gate.Document) ResourceInstantiationException(gate.creole.ResourceInstantiationException) FeatureMap(gate.FeatureMap) SerialDataStore(gate.persist.SerialDataStore) DataStore(gate.DataStore) PersistenceException(gate.persist.PersistenceException) Document(gate.Document) File(java.io.File)

Example 14 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class CorpusBenchmarkTool method evaluateMarkedClean.

// evaluateMarkedStored
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir) {
    Document persDoc = null;
    Document cleanDoc = null;
    Document markedDoc = null;
    File[] cleanDocs = cleanDir.listFiles();
    for (int i = 0; i < cleanDocs.length; i++) {
        if (!cleanDocs[i].isFile())
            continue;
        // try reading the original document from clean
        FeatureMap params = Factory.newFeatureMap();
        try {
            params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocs[i].toURI().toURL());
        } catch (java.net.MalformedURLException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
        params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
        FeatureMap hparams = Factory.newFeatureMap();
        // create the document
        try {
            cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDocs[i].getName());
        } catch (gate.creole.ResourceInstantiationException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        if (isMoreInfoMode) {
            StringBuffer errName = new StringBuffer(cleanDocs[i].getName());
            errName.replace(cleanDocs[i].getName().lastIndexOf("."), cleanDocs[i].getName().length(), ".err");
            Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + cleanDocs[i].getName() + "</a>" + "</H2>");
        } else
            Out.prln("<H2>" + cleanDocs[i].getName() + "</H2>");
        // try finding the marked document
        if (!isMarkedDS) {
            StringBuffer docName = new StringBuffer(cleanDoc.getName());
            docName.replace(cleanDoc.getName().lastIndexOf("."), docName.length(), ".xml");
            File markedDocFile = new File(markedDir, docName.toString());
            if (!markedDocFile.exists()) {
                Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
                continue;
            } else {
                params = Factory.newFeatureMap();
                try {
                    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
                } catch (java.net.MalformedURLException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
                // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
                params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                // create the document
                try {
                    markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDoc.getName());
                } catch (gate.creole.ResourceInstantiationException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
            }
        // if markedDoc exists
        } else {
            try {
                // open marked from a DS
                // open the data store
                DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
                List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
                boolean found = false;
                int k = 0;
                // search for the marked doc with the same name
                while (k < lrIDs1.size() && !found) {
                    String docID1 = lrIDs1.get(k);
                    // read the stored document
                    FeatureMap features1 = Factory.newFeatureMap();
                    features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
                    features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
                    Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
                    // check whether this is our doc
                    if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(cleanDoc.getName())) {
                        found = true;
                        markedDoc = tempDoc;
                    } else
                        k++;
                }
            } catch (java.net.MalformedURLException ex) {
                Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
            } catch (gate.persist.PersistenceException ex1) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            } catch (gate.creole.ResourceInstantiationException ex2) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            }
        }
        try {
            evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
        } catch (gate.creole.ResourceInstantiationException ex) {
            ex.printStackTrace();
            Out.prln("Evaluate failed on document: " + cleanDoc.getName());
        }
        if (persDoc != null) {
            final gate.Document pd = persDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(pd);
                }
            });
        }
        if (cleanDoc != null) {
            final gate.Document cd = cleanDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(cd);
                }
            });
        }
        if (markedDoc != null) {
            final gate.Document md = markedDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(md);
                }
            });
        }
    }
// for loop through clean docs
}
Also used : ResourceInstantiationException(gate.creole.ResourceInstantiationException) Document(gate.Document) FeatureMap(gate.FeatureMap) PersistenceException(gate.persist.PersistenceException) SerialDataStore(gate.persist.SerialDataStore) DataStore(gate.DataStore) Document(gate.Document) File(java.io.File)

Example 15 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class XmlDocumentHandler method startElement.

// endDocument();
/**
 * This method is called when the SAX parser encounts the beginning of an
 * XML element.
 */
/**
 * @param uri - namespace uri
 * @param localName - local, unprefixed element name
 * @param qName - fully qualified, prefixed element name
 * @param atts
 * @throws SAXException
 */
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
    // call characterActions
    if (readCharacterStatus) {
        readCharacterStatus = false;
        charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
    }
    // so far is a multiple of ELEMENTS_RATE
    if ((++elements % ELEMENTS_RATE) == 0) {
        fireStatusChangedEvent("Processed elements : " + elements);
    }
    Integer customObjectId = null;
    // Construct a SimpleFeatureMapImpl from the list of attributes
    FeatureMap fm = Factory.newFeatureMap();
    /**
     * Use localName rather than qName and add the namespace prefix and uri
     *  as features if global flag is set
     */
    String elemName = qName;
    boolean hasNSUri = (uri != null && !uri.isEmpty());
    if (deserializeNamespaceInfo && hasNSUri) {
        elemName = localName;
        StringTokenizer strToken = new StringTokenizer(qName, ":");
        if (strToken.countTokens() > 1) {
            String nsPrefix = strToken.nextToken();
            fm.put(namespaceURIFeature, uri);
            fm.put(namespacePrefixFeature, nsPrefix);
        }
    }
    // Get the name and the value of the attributes and add them to a FeaturesMAP
    for (int i = 0; i < atts.getLength(); i++) {
        String attName = atts.getLocalName(i);
        String attValue = atts.getValue(i);
        String attUri = atts.getURI(i);
        if (attUri != null && Gate.URI.equals(attUri)) {
            if ("gateId".equals(attName)) {
                customObjectId = Integer.parseInt(attValue);
            }
            // End if
            if ("annotMaxId".equals(attName)) {
                customObjectsId = Integer.parseInt(attValue);
            }
            // End if
            if ("matches".equals(attName)) {
                StringTokenizer strTokenizer = new StringTokenizer(attValue, ";");
                List<Integer> list = new ArrayList<Integer>();
                // Take all tokens,create Integers and add them to the list
                while (strTokenizer.hasMoreTokens()) {
                    String token = strTokenizer.nextToken();
                    list.add(Integer.valueOf(token));
                }
                // End while
                fm.put(attName, list);
            }
        // End if
        } else {
            fm.put(atts.getQName(i), attValue);
        }
    // End if
    }
    // End for
    // create the START index of the annotation
    Long startIndex = Long.valueOf(tmpDocContent.length());
    // initialy the Start index is equal with End index
    CustomObject obj = new CustomObject(customObjectId, elemName, fm, startIndex, startIndex);
    // put this object into the stack
    stack.push(obj);
}
Also used : FeatureMap(gate.FeatureMap) StringTokenizer(java.util.StringTokenizer) ArrayList(java.util.ArrayList)

Aggregations

FeatureMap (gate.FeatureMap)55 Document (gate.Document)15 URL (java.net.URL)14 ResourceInstantiationException (gate.creole.ResourceInstantiationException)11 File (java.io.File)10 Resource (gate.Resource)8 GateRuntimeException (gate.util.GateRuntimeException)7 ArrayList (java.util.ArrayList)7 List (java.util.List)7 PersistenceException (gate.persist.PersistenceException)6 Annotation (gate.Annotation)5 AnnotationSet (gate.AnnotationSet)5 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 TestDocument (gate.corpora.TestDocument)4 ResourceData (gate.creole.ResourceData)4 SerialDataStore (gate.persist.SerialDataStore)4 InvalidOffsetException (gate.util.InvalidOffsetException)4 Corpus (gate.Corpus)3 ProcessingResource (gate.ProcessingResource)3