Search in sources :

Example 41 with TaxonImpl

use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.

the class LinkerTermMatcherTest method exactMatchExcludeStrains.

@Test
@Ignore
public // see https://github.com/GlobalNamesArchitecture/gnparser/issues/291
void exactMatchExcludeStrains() throws NodeFactoryException, PropertyEnricherException {
    taxonIndex.getOrCreateTaxon(new TaxonImpl("Phytophthora infestans", null));
    new LinkerTermMatcher(getGraphDb()).link();
    Collection<String> ids = LinkerTestUtil.assertHasOther("Phytophthora infestans", 6, taxonIndex, RelTypes.SAME_AS);
    assertThat(ids, hasItem("NCBI:4787"));
    assertThat(ids, not(hasItem("NCBI:403677")));
}
Also used : TaxonImpl(org.eol.globi.domain.TaxonImpl) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 42 with TaxonImpl

use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForCoetzer method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
        throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
    }
    DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
    final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
    final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
    try {
        InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File taxonTempFile = null;
        File assocTempFile = null;
        File referencesTempFile = null;
        File distributionTempFile = null;
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
                taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))description.txt$")) {
                assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))references.txt$")) {
                referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
                distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (taxonTempFile == null) {
            throw new StudyImporterException("failed to find expected [taxon.txt] resource");
        }
        if (assocTempFile == null) {
            throw new StudyImporterException("failed to find expected [description.txt] resource");
        }
        if (referencesTempFile == null) {
            throw new StudyImporterException("failed to find expected [references.txt] resource");
        }
        if (distributionTempFile == null) {
            throw new StudyImporterException("failed to find expected [distribution.txt] resource");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        parser.changeDelimiter('\t');
        String[] line;
        while ((line = parser.getLine()) != null) {
            taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
        }
        LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
        refs.changeDelimiter('\t');
        String[] refsLine;
        while ((refsLine = refs.getLine()) != null) {
            refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
        }
        LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
        assoc.changeDelimiter('\t');
        final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {

            {
                put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
                put("Host of", InteractType.VISITS_FLOWERS_OF);
                put("Parasite of", InteractType.PARASITE_OF);
                put("Nests in", InteractType.INTERACTS_WITH);
            }
        };
        String[] assocLine;
        while ((assocLine = assoc.getLine()) != null) {
            final Integer taxonId = Integer.parseInt(assocLine[0]);
            final String[] parts = assocLine[2].split(":");
            if (parts.length > 1) {
                String interactionString = parts[0];
                String[] targetTaxonNames = parts[1].split(",");
                for (String targetTaxonName : targetTaxonNames) {
                    final String reference = refMap.get(taxonId);
                    final String sourceTaxonName = taxonMap.get(taxonId);
                    if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
                        final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
                        final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
                        final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
                        final InteractType relType = interactTypeMap.get(interactionString);
                        if (relType == null) {
                            throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
                        }
                        source.interactsWith(target, relType);
                    }
                }
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
    db.close();
}
Also used : InteractType(org.eol.globi.domain.InteractType) Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Specimen(org.eol.globi.domain.Specimen) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) File(java.io.File) DB(org.mapdb.DB) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Example 43 with TaxonImpl

use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForCook method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser parser;
    try {
        parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource", e);
    }
    String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
    StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from  http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
    study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
    Study study = nodeFactory.getOrCreateStudy(study1);
    try {
        Double latitude = LocationUtil.parseDegrees("27º51'N");
        Double longitude = LocationUtil.parseDegrees("97º8'W");
        Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
        try {
            while (parser.getLine() != null) {
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
                host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
                String dateString = parser.getValueByLabel("Date");
                Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
                nodeFactory.setUnixEpochProperty(host, collectionDate);
                host.caughtIn(sampleLocation);
                String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
                for (String isoCol : isoCols) {
                    addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
                }
            }
        } catch (IOException e) {
            throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
        } catch (IllegalArgumentException e) {
            throw new StudyImporterException("failed to parse date", e);
        }
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to create host and parasite taxons", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) Date(java.util.Date) Specimen(org.eol.globi.domain.Specimen) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location)

Example 44 with TaxonImpl

use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForCook method addParasites.

private void addParasites(LabeledCSVParser parser, Study study, Location sampleLocation, Specimen host, Date collectionDate, String isoCol) throws NodeFactoryException {
    try {
        String valueByLabel = parser.getValueByLabel(isoCol);
        boolean parasiteDetected = !"0".equals(valueByLabel);
        boolean lengthAvailable = parasiteDetected && !"NA".equals(valueByLabel);
        if (parasiteDetected) {
            Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl("Cymothoa excisa", null));
            parasite.caughtIn(sampleLocation);
            if (lengthAvailable) {
                double parasiteLengthCm = Double.parseDouble(valueByLabel);
                parasite.setLengthInMm(parasiteLengthCm * 10.0);
            }
            parasite.interactsWith(host, InteractType.PARASITE_OF);
            nodeFactory.setUnixEpochProperty(parasite, collectionDate);
        }
    } catch (NumberFormatException ex) {
    // ignore
    }
}
Also used : Specimen(org.eol.globi.domain.Specimen) TaxonImpl(org.eol.globi.domain.TaxonImpl)

Example 45 with TaxonImpl

use of org.eol.globi.domain.TaxonImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForBell method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    for (String resource : RESOURCE) {
        LabeledCSVParser parser = null;
        try {
            parser = parserFactory.createParser(resource, "UTF-8");
            while (parser.getLine() != null) {
                String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
                String guid = parser.getValueByLabel("GUID");
                String externalId = "http://arctos.database.museum/guid/" + guid;
                String description = null;
                String collectionId = null;
                for (String key : REFS.keySet()) {
                    if (guid.startsWith(key)) {
                        description = REFS.get(key);
                        collectionId = key;
                        break;
                    }
                }
                if (StringUtils.isBlank(description)) {
                    LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
                    description = sourceCitation;
                    collectionId = "";
                }
                Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
                String genus = parser.getValueByLabel("Genus");
                String species = parser.getValueByLabel("Species");
                String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
                Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
                parasite.setExternalId(externalId);
                Location location = getLocation(parser, parasite);
                parasite.caughtIn(location);
                String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
                String hostName = StringUtils.trim(scientificName);
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
                host.caughtIn(location);
                host.setExternalId(externalId);
                parasite.interactsWith(host, InteractType.PARASITE_OF);
                Date date = parseDate(parser);
                nodeFactory.setUnixEpochProperty(parasite, date);
                nodeFactory.setUnixEpochProperty(host, date);
            }
        } catch (Throwable e) {
            throw new StudyImporterException(getErrorMessage(resource, parser), e);
        }
    }
}
Also used : Study(org.eol.globi.domain.Study) Specimen(org.eol.globi.domain.Specimen) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Date(java.util.Date) Location(org.eol.globi.domain.Location)

Aggregations

TaxonImpl (org.eol.globi.domain.TaxonImpl)123 Specimen (org.eol.globi.domain.Specimen)59 Test (org.junit.Test)54 Taxon (org.eol.globi.domain.Taxon)42 StudyImpl (org.eol.globi.domain.StudyImpl)34 Study (org.eol.globi.domain.Study)32 Location (org.eol.globi.domain.Location)16 LocationImpl (org.eol.globi.domain.LocationImpl)15 TaxonNode (org.eol.globi.domain.TaxonNode)13 LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)12 IOException (java.io.IOException)11 TermImpl (org.eol.globi.domain.TermImpl)11 StringWriter (java.io.StringWriter)9 ArrayList (java.util.ArrayList)7 Date (java.util.Date)7 HashMap (java.util.HashMap)7 NonResolvingTaxonIndex (org.eol.globi.taxon.NonResolvingTaxonIndex)7 Map (java.util.Map)5 Node (org.neo4j.graphdb.Node)5 File (java.io.File)4