use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.
the class LinkerTermMatcherTest method exactMatchExcludeStrains.
@Test
@Ignore
public // see https://github.com/GlobalNamesArchitecture/gnparser/issues/291
void exactMatchExcludeStrains() throws NodeFactoryException, PropertyEnricherException {
taxonIndex.getOrCreateTaxon(new TaxonImpl("Phytophthora infestans", null));
new LinkerTermMatcher(getGraphDb()).link();
Collection<String> ids = LinkerTestUtil.assertHasOther("Phytophthora infestans", 6, taxonIndex, RelTypes.SAME_AS);
assertThat(ids, hasItem("NCBI:4787"));
assertThat(ids, not(hasItem("NCBI:403677")));
}
use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForCoetzer method importStudy.
@Override
public void importStudy() throws StudyImporterException {
if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File taxonTempFile = null;
File assocTempFile = null;
File referencesTempFile = null;
File distributionTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))description.txt$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))references.txt$")) {
referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (taxonTempFile == null) {
throw new StudyImporterException("failed to find expected [taxon.txt] resource");
}
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [description.txt] resource");
}
if (referencesTempFile == null) {
throw new StudyImporterException("failed to find expected [references.txt] resource");
}
if (distributionTempFile == null) {
throw new StudyImporterException("failed to find expected [distribution.txt] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
String[] line;
while ((line = parser.getLine()) != null) {
taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
}
LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
refs.changeDelimiter('\t');
String[] refsLine;
while ((refsLine = refs.getLine()) != null) {
refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
}
LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
assoc.changeDelimiter('\t');
final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {
{
put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
put("Host of", InteractType.VISITS_FLOWERS_OF);
put("Parasite of", InteractType.PARASITE_OF);
put("Nests in", InteractType.INTERACTS_WITH);
}
};
String[] assocLine;
while ((assocLine = assoc.getLine()) != null) {
final Integer taxonId = Integer.parseInt(assocLine[0]);
final String[] parts = assocLine[2].split(":");
if (parts.length > 1) {
String interactionString = parts[0];
String[] targetTaxonNames = parts[1].split(",");
for (String targetTaxonName : targetTaxonNames) {
final String reference = refMap.get(taxonId);
final String sourceTaxonName = taxonMap.get(taxonId);
if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
final InteractType relType = interactTypeMap.get(interactionString);
if (relType == null) {
throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
}
source.interactsWith(target, relType);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForCook method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser parser;
try {
parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource", e);
}
String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
Study study = nodeFactory.getOrCreateStudy(study1);
try {
Double latitude = LocationUtil.parseDegrees("27º51'N");
Double longitude = LocationUtil.parseDegrees("97º8'W");
Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
try {
while (parser.getLine() != null) {
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
String dateString = parser.getValueByLabel("Date");
Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
nodeFactory.setUnixEpochProperty(host, collectionDate);
host.caughtIn(sampleLocation);
String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
for (String isoCol : isoCols) {
addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
} catch (IllegalArgumentException e) {
throw new StudyImporterException("failed to parse date", e);
}
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to create host and parasite taxons", e);
}
}
use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForCook method addParasites.
private void addParasites(LabeledCSVParser parser, Study study, Location sampleLocation, Specimen host, Date collectionDate, String isoCol) throws NodeFactoryException {
try {
String valueByLabel = parser.getValueByLabel(isoCol);
boolean parasiteDetected = !"0".equals(valueByLabel);
boolean lengthAvailable = parasiteDetected && !"NA".equals(valueByLabel);
if (parasiteDetected) {
Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl("Cymothoa excisa", null));
parasite.caughtIn(sampleLocation);
if (lengthAvailable) {
double parasiteLengthCm = Double.parseDouble(valueByLabel);
parasite.setLengthInMm(parasiteLengthCm * 10.0);
}
parasite.interactsWith(host, InteractType.PARASITE_OF);
nodeFactory.setUnixEpochProperty(parasite, collectionDate);
}
} catch (NumberFormatException ex) {
// ignore
}
}
use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.
the class StudyImporterForBell method importStudy.
@Override
public void importStudy() throws StudyImporterException {
for (String resource : RESOURCE) {
LabeledCSVParser parser = null;
try {
parser = parserFactory.createParser(resource, "UTF-8");
while (parser.getLine() != null) {
String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
String guid = parser.getValueByLabel("GUID");
String externalId = "http://arctos.database.museum/guid/" + guid;
String description = null;
String collectionId = null;
for (String key : REFS.keySet()) {
if (guid.startsWith(key)) {
description = REFS.get(key);
collectionId = key;
break;
}
}
if (StringUtils.isBlank(description)) {
LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
description = sourceCitation;
collectionId = "";
}
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
String genus = parser.getValueByLabel("Genus");
String species = parser.getValueByLabel("Species");
String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
parasite.setExternalId(externalId);
Location location = getLocation(parser, parasite);
parasite.caughtIn(location);
String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
String hostName = StringUtils.trim(scientificName);
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
host.caughtIn(location);
host.setExternalId(externalId);
parasite.interactsWith(host, InteractType.PARASITE_OF);
Date date = parseDate(parser);
nodeFactory.setUnixEpochProperty(parasite, date);
nodeFactory.setUnixEpochProperty(host, date);
}
} catch (Throwable e) {
throw new StudyImporterException(getErrorMessage(resource, parser), e);
}
}
}
Aggregations