use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.
the class ReportGeneratorTest method generateStudySourceCitationReports.
@Test
public void generateStudySourceCitationReports() throws NodeFactoryException {
StudyImpl study1 = new StudyImpl("a title", "az source", null, "citation");
study1.setSourceId("az/source1");
createStudy(study1);
StudyImpl study2 = new StudyImpl("another title", "az source", null, "citation");
study2.setSourceId("az/source2");
createStudy(study2);
StudyImpl study3 = new StudyImpl("yet another title", "zother source", null, null);
study3.setSourceId("zother/source");
createStudy(study3);
resolveNames();
new ReportGenerator(getGraphDb()).generateReportForSourceCitations();
IndexHits<Node> reports = getGraphDb().index().forNodes("reports").get(StudyConstant.SOURCE, "az source");
Node reportNode = reports.getSingle();
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_STUDIES), is(2));
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_SOURCES), is(1));
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DATASETS), is(2));
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_INTERACTIONS), is(8));
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA), is(3));
assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA_NO_MATCH), is(2));
assertThat((String) reportNode.getProperty(StudyConstant.SOURCE), is("az source"));
reports.close();
IndexHits<Node> otherReports = getGraphDb().index().forNodes("reports").get(StudyConstant.SOURCE, "zother source");
Node otherReport = otherReports.getSingle();
assertThat((String) otherReport.getProperty(StudyConstant.SOURCE), is("zother source"));
assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_STUDIES), is(1));
assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_INTERACTIONS), is(4));
assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA), is(3));
assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA_NO_MATCH), is(2));
}
use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForCoetzer method importStudy.
@Override
public void importStudy() throws StudyImporterException {
if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File taxonTempFile = null;
File assocTempFile = null;
File referencesTempFile = null;
File distributionTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))description.txt$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))references.txt$")) {
referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (taxonTempFile == null) {
throw new StudyImporterException("failed to find expected [taxon.txt] resource");
}
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [description.txt] resource");
}
if (referencesTempFile == null) {
throw new StudyImporterException("failed to find expected [references.txt] resource");
}
if (distributionTempFile == null) {
throw new StudyImporterException("failed to find expected [distribution.txt] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
String[] line;
while ((line = parser.getLine()) != null) {
taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
}
LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
refs.changeDelimiter('\t');
String[] refsLine;
while ((refsLine = refs.getLine()) != null) {
refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
}
LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
assoc.changeDelimiter('\t');
final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {
{
put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
put("Host of", InteractType.VISITS_FLOWERS_OF);
put("Parasite of", InteractType.PARASITE_OF);
put("Nests in", InteractType.INTERACTS_WITH);
}
};
String[] assocLine;
while ((assocLine = assoc.getLine()) != null) {
final Integer taxonId = Integer.parseInt(assocLine[0]);
final String[] parts = assocLine[2].split(":");
if (parts.length > 1) {
String interactionString = parts[0];
String[] targetTaxonNames = parts[1].split(",");
for (String targetTaxonName : targetTaxonNames) {
final String reference = refMap.get(taxonId);
final String sourceTaxonName = taxonMap.get(taxonId);
if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
final InteractType relType = interactTypeMap.get(interactionString);
if (relType == null) {
throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
}
source.interactsWith(target, relType);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForCook method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser parser;
try {
parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource", e);
}
String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
Study study = nodeFactory.getOrCreateStudy(study1);
try {
Double latitude = LocationUtil.parseDegrees("27º51'N");
Double longitude = LocationUtil.parseDegrees("97º8'W");
Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
try {
while (parser.getLine() != null) {
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
String dateString = parser.getValueByLabel("Date");
Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
nodeFactory.setUnixEpochProperty(host, collectionDate);
host.caughtIn(sampleLocation);
String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
for (String isoCol : isoCols) {
addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
} catch (IllegalArgumentException e) {
throw new StudyImporterException("failed to parse date", e);
}
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to create host and parasite taxons", e);
}
}
use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForBell method importStudy.
@Override
public void importStudy() throws StudyImporterException {
for (String resource : RESOURCE) {
LabeledCSVParser parser = null;
try {
parser = parserFactory.createParser(resource, "UTF-8");
while (parser.getLine() != null) {
String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
String guid = parser.getValueByLabel("GUID");
String externalId = "http://arctos.database.museum/guid/" + guid;
String description = null;
String collectionId = null;
for (String key : REFS.keySet()) {
if (guid.startsWith(key)) {
description = REFS.get(key);
collectionId = key;
break;
}
}
if (StringUtils.isBlank(description)) {
LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
description = sourceCitation;
collectionId = "";
}
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
String genus = parser.getValueByLabel("Genus");
String species = parser.getValueByLabel("Species");
String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
parasite.setExternalId(externalId);
Location location = getLocation(parser, parasite);
parasite.caughtIn(location);
String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
String hostName = StringUtils.trim(scientificName);
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
host.caughtIn(location);
host.setExternalId(externalId);
parasite.interactsWith(host, InteractType.PARASITE_OF);
Date date = parseDate(parser);
nodeFactory.setUnixEpochProperty(parasite, date);
nodeFactory.setUnixEpochProperty(host, date);
}
} catch (Throwable e) {
throw new StudyImporterException(getErrorMessage(resource, parser), e);
}
}
}
use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForBrose method importLine.
private void importLine(LabeledCSVParser parser, Map<String, String> refMap) throws StudyImporterException {
Study localStudy = null;
try {
String shortReference = StringUtils.trim(parser.getValueByLabel("Link reference"));
if (!refMap.containsKey(shortReference)) {
throw new StudyImporterException("failed to find ref [" + shortReference + "] on line [" + parser.lastLineNumber() + "]");
}
String longReference = refMap.get(shortReference);
localStudy = nodeFactory.getOrCreateStudy(new StudyImpl("BROSE-" + StringUtils.abbreviate(longReference, 20), SOURCE, null, ExternalIdUtil.toCitation(null, longReference, null)));
String name = getName(parser, "Taxonomy consumer", "Common name(s) consumer");
if (StringUtils.isBlank(name)) {
getLogger().warn(localStudy, "found empty name on line [" + parser.lastLineNumber() + "]");
} else {
addInteractionForConsumer(parser, localStudy, name);
}
} catch (NodeFactoryException e) {
throw new StudyImporterException("problem creating nodes at line [" + parser.lastLineNumber() + "]", e);
} catch (NumberFormatException e) {
String message = "skipping record, found malformed field at line [" + parser.lastLineNumber() + "]: ";
if (localStudy != null) {
getLogger().warn(localStudy, message + e.getMessage());
}
}
}
Aggregations