Search in sources :

Example 21 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForCoetzer method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
        throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
    }
    DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
    final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
    final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
    try {
        InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File taxonTempFile = null;
        File assocTempFile = null;
        File referencesTempFile = null;
        File distributionTempFile = null;
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
                taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))description.txt$")) {
                assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))references.txt$")) {
                referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
                distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (taxonTempFile == null) {
            throw new StudyImporterException("failed to find expected [taxon.txt] resource");
        }
        if (assocTempFile == null) {
            throw new StudyImporterException("failed to find expected [description.txt] resource");
        }
        if (referencesTempFile == null) {
            throw new StudyImporterException("failed to find expected [references.txt] resource");
        }
        if (distributionTempFile == null) {
            throw new StudyImporterException("failed to find expected [distribution.txt] resource");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        parser.changeDelimiter('\t');
        String[] line;
        while ((line = parser.getLine()) != null) {
            taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
        }
        LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
        refs.changeDelimiter('\t');
        String[] refsLine;
        while ((refsLine = refs.getLine()) != null) {
            refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
        }
        LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
        assoc.changeDelimiter('\t');
        final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {

            {
                put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
                put("Host of", InteractType.VISITS_FLOWERS_OF);
                put("Parasite of", InteractType.PARASITE_OF);
                put("Nests in", InteractType.INTERACTS_WITH);
            }
        };
        String[] assocLine;
        while ((assocLine = assoc.getLine()) != null) {
            final Integer taxonId = Integer.parseInt(assocLine[0]);
            final String[] parts = assocLine[2].split(":");
            if (parts.length > 1) {
                String interactionString = parts[0];
                String[] targetTaxonNames = parts[1].split(",");
                for (String targetTaxonName : targetTaxonNames) {
                    final String reference = refMap.get(taxonId);
                    final String sourceTaxonName = taxonMap.get(taxonId);
                    if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
                        final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
                        final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
                        final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
                        final InteractType relType = interactTypeMap.get(interactionString);
                        if (relType == null) {
                            throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
                        }
                        source.interactsWith(target, relType);
                    }
                }
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
    db.close();
}
Also used : InteractType(org.eol.globi.domain.InteractType) Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Specimen(org.eol.globi.domain.Specimen) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) File(java.io.File) DB(org.mapdb.DB) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Example 22 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForCook method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser parser;
    try {
        parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource", e);
    }
    String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
    StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from  http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
    study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
    Study study = nodeFactory.getOrCreateStudy(study1);
    try {
        Double latitude = LocationUtil.parseDegrees("27º51'N");
        Double longitude = LocationUtil.parseDegrees("97º8'W");
        Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
        try {
            while (parser.getLine() != null) {
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
                host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
                String dateString = parser.getValueByLabel("Date");
                Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
                nodeFactory.setUnixEpochProperty(host, collectionDate);
                host.caughtIn(sampleLocation);
                String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
                for (String isoCol : isoCols) {
                    addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
                }
            }
        } catch (IOException e) {
            throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
        } catch (IllegalArgumentException e) {
            throw new StudyImporterException("failed to parse date", e);
        }
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to create host and parasite taxons", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) Date(java.util.Date) Specimen(org.eol.globi.domain.Specimen) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location)

Example 23 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForBell method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    for (String resource : RESOURCE) {
        LabeledCSVParser parser = null;
        try {
            parser = parserFactory.createParser(resource, "UTF-8");
            while (parser.getLine() != null) {
                String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
                String guid = parser.getValueByLabel("GUID");
                String externalId = "http://arctos.database.museum/guid/" + guid;
                String description = null;
                String collectionId = null;
                for (String key : REFS.keySet()) {
                    if (guid.startsWith(key)) {
                        description = REFS.get(key);
                        collectionId = key;
                        break;
                    }
                }
                if (StringUtils.isBlank(description)) {
                    LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
                    description = sourceCitation;
                    collectionId = "";
                }
                Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
                String genus = parser.getValueByLabel("Genus");
                String species = parser.getValueByLabel("Species");
                String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
                Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
                parasite.setExternalId(externalId);
                Location location = getLocation(parser, parasite);
                parasite.caughtIn(location);
                String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
                String hostName = StringUtils.trim(scientificName);
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
                host.caughtIn(location);
                host.setExternalId(externalId);
                parasite.interactsWith(host, InteractType.PARASITE_OF);
                Date date = parseDate(parser);
                nodeFactory.setUnixEpochProperty(parasite, date);
                nodeFactory.setUnixEpochProperty(host, date);
            }
        } catch (Throwable e) {
            throw new StudyImporterException(getErrorMessage(resource, parser), e);
        }
    }
}
Also used : Study(org.eol.globi.domain.Study) Specimen(org.eol.globi.domain.Specimen) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Date(java.util.Date) Location(org.eol.globi.domain.Location)

Example 24 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForByrnes method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser dataParser;
    try {
        dataParser = parserFactory.createParser(RESOURCE_PATH, CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource [" + RESOURCE_PATH + "]", e);
    }
    Map<String, String> refMap = buildRefMap();
    try {
        while (dataParser.getLine() != null) {
            if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
                importLine(dataParser, refMap);
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("problem importing study at line [" + dataParser.lastLineNumber() + "]", e);
    }
}
Also used : LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Example 25 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForGoMexSI2Test method importSinglePrey.

@Test
public void importSinglePrey() throws IOException, StudyImporterException {
    final Map<String, String> parsedProperties = new HashMap<String, String>();
    String predOneLine = "DATA_ID,PRED_ID,PREY_SOURCE_NAME,PREY_DATABASE_NAME,PHYSIOLOG_STATE,SED_ORIGIN,PREY_PARTS,PREY_LIFE_HIST_STAGE,PREY_COND_INDEX,PREY_SEX,PREY_SEX_RATIO,PREY_LEN_TYPE,PREY_MIN_LEN,PREY_MAX_LEN,PREY_MN_LEN,PREY_MIN_WIDTH,PREY_MAX_WIDTH,PREY_MN_WIDTH,BIOMASS,BIOMASS_QUALIFIER,PCT_BIOMASS,PCT_BIOMASS_QUALIFIER,N_CONS,N_CONS_QUALIFIER,PCT_N_CONS,PCT_N_CONS_QUALIFIER,VOL_CONS,VOL_CONS_QUALIFIER,PCT_VOL_CONS,PCT_VOL_CONS_QUALIFIER,FREQ_OCC,FREQ_OCC_QUALIFIER,PCT_FREQ_OCC,PCT_FREQ_OCC_QUALIFIER,IRI,PCT_IRI,IRIa,IIR,E,PREY_NOTES,ENTRY_DATE,ENTRY_PERSON,EDITED_DATE,DATA_EDITOR,MODIFY_DATE,DATA_MODIFIER\n" + "ACT_16R,Cchr.1,Crustacea,Crustacea,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.245,NA,0.15,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Jim Simons,NA,Jim Simons,27/06/2016,Theresa Mitchell\n";
    StudyImporterForGoMexSI2.parseSpecimen("test.txt", "PREY_", new ParseEventHandler() {

        @Override
        public void onSpecimen(String predatorUID, Map<String, String> properties) {
            parsedProperties.putAll(properties);
        }
    }, new LabeledCSVParser(new CSVParser(new StringReader(predOneLine))));
    assertThat(parsedProperties.get("name"), is("Crustacea"));
    assertThat(parsedProperties.get("GOMEXSI:PREY_SOURCE_NAME"), is("Crustacea"));
}
Also used : HashMap(java.util.HashMap) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) CSVParser(com.Ostermiller.util.CSVParser) StringReader(java.io.StringReader) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Test(org.junit.Test)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6