use of gate.Document in project gate-core by GateNLP.
the class TestPersist method testDSR.
// testDelete()
/**
* Test the DS register.
*/
public void testDSR() throws Exception {
DataStoreRegister dsr = Gate.getDataStoreRegister();
assertTrue("DSR has wrong number elements (not 0): " + dsr.size(), dsr.size() == 0);
// create a temporary directory; because File.createTempFile actually
// writes the bloody thing, we need to delete it from disk before calling
// DataStore.create
File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
storageDir.delete();
// create and open a serial data store
DataStore sds = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
// create a document with some annotations / features on it
String server = TestDocument.getTestServerName();
Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
doc.getFeatures().put("hi there", new Integer(23232));
doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
// save the document
Document persDoc = (Document) sds.adopt(doc);
sds.sync(persDoc);
// DSR should have one member
assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
// create and open another serial data store
storageDir = File.createTempFile("TestPersist__", "__StorageDir");
storageDir.delete();
DataStore sds2 = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
// DSR should have two members
assertTrue("DSR has wrong number elements: " + dsr.size(), dsr.size() == 2);
// peek at the DSR members
Iterator<DataStore> dsrIter = dsr.iterator();
while (dsrIter.hasNext()) {
DataStore ds = dsrIter.next();
assertNotNull("null ds in ds reg", ds);
if (DEBUG)
Out.prln(ds);
}
// delete the datastores
sds.close();
assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
sds.delete();
assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
sds2.delete();
assertTrue("DSR has wrong number elements (expected 0): " + dsr.size(), dsr.size() == 0);
}
use of gate.Document in project gate-core by GateNLP.
the class TestPersist method testMultipleLrs.
// testSimple()
/**
* Test multiple LRs
*/
public void testMultipleLrs() throws Exception {
// create a temporary directory; because File.createTempFile actually
// writes the bloody thing, we need to delete it from disk before calling
// DataStore.create
File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
storageDir.delete();
// create and open a serial data store
SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
sds.create();
sds.open();
// create a document with some annotations / features on it
String server = TestDocument.getTestServerName();
Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
doc.getFeatures().put("hi there", new Integer(23232));
doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
// create another document with some annotations / features on it
Document doc2 = Factory.newDocument(new URL(server + "tests/html/test1.htm"));
doc.getFeatures().put("hi there again", new Integer(23232));
doc.getAnnotations().add(new Long(5), new Long(25), "dog poo irritates", Factory.newFeatureMap());
// create a corpus with the documents
Corpus corp = Factory.newCorpus("Hamish test corpus");
corp.add(doc);
corp.add(doc2);
LanguageResource persCorpus = sds.adopt(corp);
sds.sync(persCorpus);
// read the documents back
List<Resource> lrsFromDisk = new ArrayList<Resource>();
List<String> lrIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
Iterator<String> idsIter = lrIds.iterator();
while (idsIter.hasNext()) {
String lrId = idsIter.next();
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
Resource lr = Factory.createResource("gate.corpora.SerialCorpusImpl", features);
lrsFromDisk.add(lr);
}
if (DEBUG)
System.out.println("LRs on disk" + lrsFromDisk);
// check that the versions we read back match the originals
Corpus diskCorp = (Corpus) lrsFromDisk.get(0);
Document diskDoc = diskCorp.get(0);
if (DEBUG)
Out.prln("Documents in corpus: " + corp.getDocumentNames());
assertTrue("corp name != mem name", corp.getName().equals(diskCorp.getName()));
if (DEBUG)
Out.prln("Memory features " + corp.getFeatures());
if (DEBUG)
Out.prln("Disk features " + diskCorp.getFeatures());
assertTrue("corp feat != mem feat", corp.getFeatures().equals(diskCorp.getFeatures()));
if (DEBUG)
Out.prln("Annotations in doc: " + diskDoc.getAnnotations());
assertTrue("doc annotations from disk not equal to memory version", TestEqual.annotationSetsEqual(doc.getAnnotations(), diskDoc.getAnnotations()));
assertTrue("doc from disk not equal to memory version", TestEqual.documentsEqual(doc, diskDoc));
Iterator<Document> corpusIter = diskCorp.iterator();
while (corpusIter.hasNext()) {
if (DEBUG)
Out.prln(corpusIter.next().getName());
else
corpusIter.next();
}
// assertTrue("doc2 from disk not equal to memory version",
// doc2.equals(diskDoc2));
// delete the datastore
sds.delete();
}
use of gate.Document in project gate-core by GateNLP.
the class TestPersist method testSaveRestore.
// tearDown
/**
* Test resource save and restore
*/
public void testSaveRestore() throws Exception {
File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
// get rid of the temp file
storageDir.delete();
// create an empty dir of same name
storageDir.mkdir();
SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
sds.create();
sds.open();
// create a document
String server = TestDocument.getTestServerName();
assertNotNull(server);
Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
assertNotNull(doc);
doc.getFeatures().put("hi there", new Integer(23232));
doc.getAnnotations().add(new Long(0), new Long(20), "thingymajig", Factory.newFeatureMap());
// check that we can't save a resource without adopting it
boolean cannotSync = false;
try {
sds.sync(doc);
} catch (PersistenceException e) {
cannotSync = true;
}
if (!cannotSync)
assertTrue("doc synced ok before adoption", false);
// check that we can't adopt a resource that's stored somewhere else
doc.setDataStore(new SerialDataStore(new File("z:\\").toURI().toURL().toString()));
try {
sds.adopt(doc);
} catch (PersistenceException e) {
cannotSync = true;
}
if (!cannotSync)
assertTrue("doc adopted but in other datastore already", false);
doc.setDataStore(null);
doc.setName("Alicia Tonbridge, a Document");
// save the document
Document persDoc = (Document) sds.adopt(doc);
sds.sync(persDoc);
Object lrPersistenceId = persDoc.getLRPersistenceId();
// test the getLrTypes method
List<String> lrTypes = sds.getLrTypes();
assertTrue("wrong number of types in SDS", lrTypes.size() == 1);
assertTrue("wrong type LR in SDS", lrTypes.get(0).equals("gate.corpora.DocumentImpl"));
// test the getLrNames method
Iterator<String> iter = sds.getLrNames("gate.corpora.DocumentImpl").iterator();
String name = iter.next();
assertEquals(name, "Alicia Tonbridge, a Document");
// read the document back
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.LR_ID_FEATURE_NAME, lrPersistenceId);
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
Document doc2 = (Document) Factory.createResource("gate.corpora.DocumentImpl", features);
Document doc3 = (Document) sds.getLr("gate.corpora.DocumentImpl", lrPersistenceId);
try {
boolean value = TestEqual.documentsEqual(doc3, doc2);
assertTrue(TestEqual.message, value);
value = TestEqual.documentsEqual(persDoc, doc2);
assertTrue(TestEqual.message, value);
} finally {
// delete the datastore
sds.delete();
}
}
use of gate.Document in project gate-core by GateNLP.
the class TestAnnotationMerging method testWithfeat.
/**
* The actual method for testing.
*/
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
// get the annotation sets
String[] annSetsN = nameAnnSets.split(";");
int numJudges = annSetsN.length;
int numDocs = data.size();
AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
for (int i = 0; i < numDocs; ++i) {
Document doc = data.get(i);
for (int j = 0; j < numJudges; ++j) {
// Get the annotation
annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
}
}
// Annotation merging
boolean isTheSameInstances = true;
for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
isTheSameInstances = false;
break;
}
HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
if (isUsingMajority)
AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
else
AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
int numAnns = 0;
if (isTheSameInstances) {
for (Annotation ann : mergeInfor.keySet()) {
if (ann.getFeatures().get(nameAnnFeat) != null)
++numAnns;
}
} else {
numAnns = mergeInfor.size();
}
checkNumbers(numAnns);
}
use of gate.Document in project gate-core by GateNLP.
the class TestDiffer method testDiffer.
// tearDown
public void testDiffer() throws Exception {
Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
AnnotationSet annSet = doc.getAnnotations();
// create 100 annotations
FeatureMap features = Factory.newFeatureMap();
features.put("type", "BAR");
for (int i = 0; i < 100; i++) {
annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
}
List<Annotation> keySet = new ArrayList<Annotation>(annSet);
List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
// check 100% Precision and recall
AnnotationDiffer differ = new AnnotationDiffer();
differ.setSignificantFeaturesSet(null);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
double value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low precision
Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
Annotation falsePositive = annSet.get(id);
responseSet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
// recall should still be 100%
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low recall
responseSet.remove(falsePositive);
keySet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
// precision should still be 100%
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
Aggregations