Search in sources :

Example 26 with StudyImpl

use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.

the class ReportGeneratorTest method generateStudySourceCitationReports.

@Test
public void generateStudySourceCitationReports() throws NodeFactoryException {
    StudyImpl study1 = new StudyImpl("a title", "az source", null, "citation");
    study1.setSourceId("az/source1");
    createStudy(study1);
    StudyImpl study2 = new StudyImpl("another title", "az source", null, "citation");
    study2.setSourceId("az/source2");
    createStudy(study2);
    StudyImpl study3 = new StudyImpl("yet another title", "zother source", null, null);
    study3.setSourceId("zother/source");
    createStudy(study3);
    resolveNames();
    new ReportGenerator(getGraphDb()).generateReportForSourceCitations();
    IndexHits<Node> reports = getGraphDb().index().forNodes("reports").get(StudyConstant.SOURCE, "az source");
    Node reportNode = reports.getSingle();
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_STUDIES), is(2));
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_SOURCES), is(1));
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DATASETS), is(2));
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_INTERACTIONS), is(8));
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA), is(3));
    assertThat((Integer) reportNode.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA_NO_MATCH), is(2));
    assertThat((String) reportNode.getProperty(StudyConstant.SOURCE), is("az source"));
    reports.close();
    IndexHits<Node> otherReports = getGraphDb().index().forNodes("reports").get(StudyConstant.SOURCE, "zother source");
    Node otherReport = otherReports.getSingle();
    assertThat((String) otherReport.getProperty(StudyConstant.SOURCE), is("zother source"));
    assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_STUDIES), is(1));
    assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_INTERACTIONS), is(4));
    assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA), is(3));
    assertThat((Integer) otherReport.getProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA_NO_MATCH), is(2));
}
Also used : Node(org.neo4j.graphdb.Node) StudyImpl(org.eol.globi.domain.StudyImpl) Test(org.junit.Test)

Example 27 with StudyImpl

use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForCoetzer method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
        throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
    }
    DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
    final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
    final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
    try {
        InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File taxonTempFile = null;
        File assocTempFile = null;
        File referencesTempFile = null;
        File distributionTempFile = null;
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
                taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))description.txt$")) {
                assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))references.txt$")) {
                referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
                distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (taxonTempFile == null) {
            throw new StudyImporterException("failed to find expected [taxon.txt] resource");
        }
        if (assocTempFile == null) {
            throw new StudyImporterException("failed to find expected [description.txt] resource");
        }
        if (referencesTempFile == null) {
            throw new StudyImporterException("failed to find expected [references.txt] resource");
        }
        if (distributionTempFile == null) {
            throw new StudyImporterException("failed to find expected [distribution.txt] resource");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        parser.changeDelimiter('\t');
        String[] line;
        while ((line = parser.getLine()) != null) {
            taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
        }
        LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
        refs.changeDelimiter('\t');
        String[] refsLine;
        while ((refsLine = refs.getLine()) != null) {
            refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
        }
        LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
        assoc.changeDelimiter('\t');
        final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {

            {
                put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
                put("Host of", InteractType.VISITS_FLOWERS_OF);
                put("Parasite of", InteractType.PARASITE_OF);
                put("Nests in", InteractType.INTERACTS_WITH);
            }
        };
        String[] assocLine;
        while ((assocLine = assoc.getLine()) != null) {
            final Integer taxonId = Integer.parseInt(assocLine[0]);
            final String[] parts = assocLine[2].split(":");
            if (parts.length > 1) {
                String interactionString = parts[0];
                String[] targetTaxonNames = parts[1].split(",");
                for (String targetTaxonName : targetTaxonNames) {
                    final String reference = refMap.get(taxonId);
                    final String sourceTaxonName = taxonMap.get(taxonId);
                    if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
                        final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
                        final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
                        final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
                        final InteractType relType = interactTypeMap.get(interactionString);
                        if (relType == null) {
                            throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
                        }
                        source.interactsWith(target, relType);
                    }
                }
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
    db.close();
}
Also used : InteractType(org.eol.globi.domain.InteractType) Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Specimen(org.eol.globi.domain.Specimen) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) File(java.io.File) DB(org.mapdb.DB) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Example 28 with StudyImpl

use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForCook method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser parser;
    try {
        parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource", e);
    }
    String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
    StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from  http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
    study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
    Study study = nodeFactory.getOrCreateStudy(study1);
    try {
        Double latitude = LocationUtil.parseDegrees("27º51'N");
        Double longitude = LocationUtil.parseDegrees("97º8'W");
        Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
        try {
            while (parser.getLine() != null) {
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
                host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
                String dateString = parser.getValueByLabel("Date");
                Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
                nodeFactory.setUnixEpochProperty(host, collectionDate);
                host.caughtIn(sampleLocation);
                String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
                for (String isoCol : isoCols) {
                    addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
                }
            }
        } catch (IOException e) {
            throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
        } catch (IllegalArgumentException e) {
            throw new StudyImporterException("failed to parse date", e);
        }
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to create host and parasite taxons", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) Date(java.util.Date) Specimen(org.eol.globi.domain.Specimen) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location)

Example 29 with StudyImpl

use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForBell method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    for (String resource : RESOURCE) {
        LabeledCSVParser parser = null;
        try {
            parser = parserFactory.createParser(resource, "UTF-8");
            while (parser.getLine() != null) {
                String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
                String guid = parser.getValueByLabel("GUID");
                String externalId = "http://arctos.database.museum/guid/" + guid;
                String description = null;
                String collectionId = null;
                for (String key : REFS.keySet()) {
                    if (guid.startsWith(key)) {
                        description = REFS.get(key);
                        collectionId = key;
                        break;
                    }
                }
                if (StringUtils.isBlank(description)) {
                    LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
                    description = sourceCitation;
                    collectionId = "";
                }
                Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
                String genus = parser.getValueByLabel("Genus");
                String species = parser.getValueByLabel("Species");
                String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
                Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
                parasite.setExternalId(externalId);
                Location location = getLocation(parser, parasite);
                parasite.caughtIn(location);
                String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
                String hostName = StringUtils.trim(scientificName);
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
                host.caughtIn(location);
                host.setExternalId(externalId);
                parasite.interactsWith(host, InteractType.PARASITE_OF);
                Date date = parseDate(parser);
                nodeFactory.setUnixEpochProperty(parasite, date);
                nodeFactory.setUnixEpochProperty(host, date);
            }
        } catch (Throwable e) {
            throw new StudyImporterException(getErrorMessage(resource, parser), e);
        }
    }
}
Also used : Study(org.eol.globi.domain.Study) Specimen(org.eol.globi.domain.Specimen) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Date(java.util.Date) Location(org.eol.globi.domain.Location)

Example 30 with StudyImpl

use of org.eol.globi.domain.StudyImpl in project eol-globi-data by jhpoelen.

the class StudyImporterForBrose method importLine.

private void importLine(LabeledCSVParser parser, Map<String, String> refMap) throws StudyImporterException {
    Study localStudy = null;
    try {
        String shortReference = StringUtils.trim(parser.getValueByLabel("Link reference"));
        if (!refMap.containsKey(shortReference)) {
            throw new StudyImporterException("failed to find ref [" + shortReference + "] on line [" + parser.lastLineNumber() + "]");
        }
        String longReference = refMap.get(shortReference);
        localStudy = nodeFactory.getOrCreateStudy(new StudyImpl("BROSE-" + StringUtils.abbreviate(longReference, 20), SOURCE, null, ExternalIdUtil.toCitation(null, longReference, null)));
        String name = getName(parser, "Taxonomy consumer", "Common name(s) consumer");
        if (StringUtils.isBlank(name)) {
            getLogger().warn(localStudy, "found empty name on line [" + parser.lastLineNumber() + "]");
        } else {
            addInteractionForConsumer(parser, localStudy, name);
        }
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("problem creating nodes at line [" + parser.lastLineNumber() + "]", e);
    } catch (NumberFormatException e) {
        String message = "skipping record, found malformed field at line [" + parser.lastLineNumber() + "]: ";
        if (localStudy != null) {
            getLogger().warn(localStudy, message + e.getMessage());
        }
    }
}
Also used : Study(org.eol.globi.domain.Study) StudyImpl(org.eol.globi.domain.StudyImpl)

Aggregations

StudyImpl (org.eol.globi.domain.StudyImpl)82 Study (org.eol.globi.domain.Study)60 Test (org.junit.Test)40 Specimen (org.eol.globi.domain.Specimen)33 TaxonImpl (org.eol.globi.domain.TaxonImpl)33 IOException (java.io.IOException)20 LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)17 Location (org.eol.globi.domain.Location)15 StringWriter (java.io.StringWriter)12 LocationImpl (org.eol.globi.domain.LocationImpl)12 Date (java.util.Date)9 HashMap (java.util.HashMap)9 Taxon (org.eol.globi.domain.Taxon)9 StudyNode (org.eol.globi.domain.StudyNode)7 Node (org.neo4j.graphdb.Node)7 File (java.io.File)6 TermImpl (org.eol.globi.domain.TermImpl)6 DatasetImpl (org.eol.globi.service.DatasetImpl)6 InteractType (org.eol.globi.domain.InteractType)5 NonResolvingTaxonIndex (org.eol.globi.taxon.NonResolvingTaxonIndex)5