Search in sources :

Example 71 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForINaturalist method retrieveDataParseResults.

private int retrieveDataParseResults() throws StudyImporterException {
    List<Integer> typesIgnored;
    try {
        typesIgnored = buildTypesIgnored(parserFactory.createParser(getTypeIgnoredURI(), CharsetConstant.UTF8));
    } catch (IOException e) {
        throw new StudyImporterException("failed to load ignored interaction types from [" + getTypeIgnoredURI() + "]");
    }
    Map<Integer, InteractType> typeMap;
    try {
        LabeledCSVParser labeledCSVParser = parserFactory.createParser(getTypeMapURI(), CharsetConstant.UTF8);
        typeMap = buildTypeMap(getTypeMapURI(), labeledCSVParser);
    } catch (IOException e) {
        throw new StudyImporterException("failed to load interaction mapping from [" + getTypeMapURI() + "]");
    }
    int totalInteractions = 0;
    int previousResultCount = 0;
    int pageNumber = 1;
    do {
        String uri = INATURALIST_URL + "/observation_field_values.json?type=taxon&page=" + pageNumber + "&per_page=100&quality_grade=research";
        try {
            previousResultCount = parseJSON(getDataset().getResource(uri), typesIgnored, typeMap);
            pageNumber++;
            totalInteractions += previousResultCount;
        } catch (IOException | StudyImporterException e) {
            throw new StudyImporterException("failed to import iNaturalist at [" + uri + "]", e);
        }
    } while (previousResultCount > 0);
    return totalInteractions;
}
Also used : InteractType(org.eol.globi.domain.InteractType) IOException(java.io.IOException) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser)

Example 72 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForINaturalist method buildTypeMap.

public static Map<Integer, InteractType> buildTypeMap(String resource, LabeledCSVParser labeledCSVParser) throws IOException {
    LabeledCSVParser parser = labeledCSVParser;
    Map<Integer, InteractType> typeMap = new TreeMap<Integer, InteractType>();
    while (parser.getLine() != null) {
        String inatIdString = parser.getValueByLabel("observation_field_id");
        Integer inatId = null;
        String prefix = PREFIX_OBSERVATION_FIELD;
        if (StringUtils.startsWith(inatIdString, prefix)) {
            inatId = Integer.parseInt(inatIdString.replace(prefix, ""));
        }
        if (inatId == null) {
            LOG.warn("failed to map observation field id [" + inatIdString + "] in line [" + resource + ":" + parser.lastLineNumber() + "]");
        } else {
            String interactionTypeId = parser.getValueByLabel("interaction_type_id");
            InteractType interactType = InteractType.typeOf(interactionTypeId);
            if (interactType == null) {
                LOG.warn("failed to map interaction type [" + interactionTypeId + "] in line [" + resource + ":" + parser.lastLineNumber() + "]");
            } else {
                typeMap.put(inatId, interactType);
            }
        }
    }
    return typeMap;
}
Also used : InteractType(org.eol.globi.domain.InteractType) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) TreeMap(java.util.TreeMap)

Example 73 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForINaturalist method buildTypesIgnored.

public static List<Integer> buildTypesIgnored(LabeledCSVParser labeledCSVParser) throws IOException {
    LabeledCSVParser parser = labeledCSVParser;
    List<Integer> typeMap1 = new ArrayList<Integer>();
    while (parser.getLine() != null) {
        String inatIdString = parser.getValueByLabel("observation_field_id");
        if (StringUtils.startsWith(inatIdString, PREFIX_OBSERVATION_FIELD)) {
            typeMap1.add(Integer.parseInt(inatIdString.replace(PREFIX_OBSERVATION_FIELD, "")));
        }
    }
    return typeMap1;
}
Also used : ArrayList(java.util.ArrayList) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser)

Example 74 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForGoMexSI2Test method importSinglePreyMissingDatabaseName.

@Test
public void importSinglePreyMissingDatabaseName() throws IOException, StudyImporterException {
    final Map<String, String> parsedProperties = new HashMap<String, String>();
    String predOneLine = "DATA_ID,PRED_ID,PREY_SOURCE_NAME,PREY_DATABASE_NAME,PHYSIOLOG_STATE,SED_ORIGIN,PREY_PARTS,PREY_LIFE_HIST_STAGE,PREY_COND_INDEX,PREY_SEX,PREY_SEX_RATIO,PREY_LEN_TYPE,PREY_MIN_LEN,PREY_MAX_LEN,PREY_MN_LEN,PREY_MIN_WIDTH,PREY_MAX_WIDTH,PREY_MN_WIDTH,BIOMASS,BIOMASS_QUALIFIER,PCT_BIOMASS,PCT_BIOMASS_QUALIFIER,N_CONS,N_CONS_QUALIFIER,PCT_N_CONS,PCT_N_CONS_QUALIFIER,VOL_CONS,VOL_CONS_QUALIFIER,PCT_VOL_CONS,PCT_VOL_CONS_QUALIFIER,FREQ_OCC,FREQ_OCC_QUALIFIER,PCT_FREQ_OCC,PCT_FREQ_OCC_QUALIFIER,IRI,PCT_IRI,IRIa,IIR,E,PREY_NOTES,ENTRY_DATE,ENTRY_PERSON,EDITED_DATE,DATA_EDITOR,MODIFY_DATE,DATA_MODIFIER\n" + "ACT_16R,Cchr.1,Crustacea,,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.245,NA,0.15,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Jim Simons,NA,Jim Simons,27/06/2016,Theresa Mitchell\n";
    StudyImporterForGoMexSI2.parseSpecimen("test.txt", "PREY_", new ParseEventHandler() {

        @Override
        public void onSpecimen(String predatorUID, Map<String, String> properties) {
            parsedProperties.putAll(properties);
        }
    }, new LabeledCSVParser(new CSVParser(new StringReader(predOneLine))));
    assertThat(parsedProperties.get("name"), is("Crustacea"));
    assertThat(parsedProperties.get("GOMEXSI:PREY_SOURCE_NAME"), is("Crustacea"));
    assertThat(parsedProperties.get("GOMEXSI:PREY_DATABASE_NAME"), is(""));
}
Also used : HashMap(java.util.HashMap) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) CSVParser(com.Ostermiller.util.CSVParser) StringReader(java.io.StringReader) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Test(org.junit.Test)

Example 75 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForMetaTableIT method importNHMStatic.

@Test
public void importNHMStatic() throws IOException, StudyImporterException {
    final List<Map<String, String>> links = new ArrayList<Map<String, String>>();
    final InteractionListener interactionListener = properties -> links.add(properties);
    final StudyImporterForMetaTable.TableParserFactory tableFactory = (config, dataset) -> {
        String firstFewLines = "\"InteractionID\",\"InteractionURL\",\"Species1UUID\",\"Species1Name\",\"Species1LifeCycleStage\",\"Species1OrganismPart\",\"Species1Status\",\"InteractionType\",\"InteractionOntologyURL\",\"Species2UUID\",\"Species2Name\",\"Species2LifeCycleStage\",\"Species2OrganismPart\",\"Species2Status\",\"LocationUUID\",\"LocationName\",\"LocationCountryName\",\"ISO2\",\"Importance\",\"InteractionRecordType\",\"Reference\",\"ReferenceDOI\",\"Reference Page\",\"Notes\"\n" + "\"4bee827f-c9f5-4c0e-9db3-e40a6e4d8008\",\"http://phthiraptera.info/node/94209\",\"c8faa033-237b-40b9-9b76-d9e7fcff9238\",\"Menacanthus alaudae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"e275d77c-e993-4de0-981f-b3f39fd4da9b\",\"Acanthis flavirostris\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"310\",\"[REF: Palma, Price & Hellenthal, 1998:310]\"\n" + "\"80e66e7c-75db-467f-9a89-a11f94d58eb3\",\"http://phthiraptera.info/node/94210\",\"fe5b2e50-b414-41d9-840d-189e732b2ea5\",\"Ricinus fringillae flammeae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"001ee8aa-dbab-43b8-9137-a61565ccf41b\",\"http://phthiraptera.info/node/94211\",\"ee17d179-9f60-4198-ac49-dc9dab3ae529\",\"Brueelia sibirica\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"d0929673-2f4c-49ec-877f-116e74ea360e\",\"http://phthiraptera.info/node/94212\",\"46084bc3-cfbf-4e01-96f8-5ecb50bc5ff9\",\"Ricinus fringillae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"2027cf09-f15d-4c2b-be28-9cb00fabf308\",\"Acanthis flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"204\",\"[REF: Rheinwald, 1968:204]\"\n";
        return new LabeledCSVParser(new CSVParser(IOUtils.toInputStream(firstFewLines)));
    };
    final String baseUrl = "https://raw.githubusercontent.com/globalbioticinteractions/natural-history-museum-london-interactions-bank/master";
    final String resource = baseUrl + "/globi.json";
    importAll(interactionListener, tableFactory, baseUrl, resource);
    assertThat(links.size(), is(4));
    for (Map<String, String> firstLine : links) {
        assertNotNull(firstLine.get(StudyImporterForTSV.INTERACTION_TYPE_NAME));
        assertNotNull(firstLine.get(StudyImporterForTSV.TARGET_TAXON_ID));
        assertNotNull(firstLine.get(StudyImporterForTSV.TARGET_TAXON_NAME));
        assertNotNull(firstLine.get(StudyImporterForTSV.SOURCE_TAXON_ID));
        assertNotNull(firstLine.get(StudyImporterForTSV.SOURCE_TAXON_NAME));
    }
}
Also used : URL(java.net.URL) Assert.assertNotNull(org.junit.Assert.assertNotNull) DatasetImpl(org.eol.globi.service.DatasetImpl) Test(org.junit.Test) IOException(java.io.IOException) JsonNode(org.codehaus.jackson.JsonNode) StringContains.containsString(org.junit.internal.matchers.StringContains.containsString) CSVParser(com.Ostermiller.util.CSVParser) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) ResourceUtil(org.eol.globi.util.ResourceUtil) Assert(junit.framework.Assert) Map(java.util.Map) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Dataset(org.eol.globi.service.Dataset) Is.is(org.hamcrest.core.Is.is) URI(java.net.URI) StringStartsWith.startsWith(org.hamcrest.core.StringStartsWith.startsWith) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) InputStream(java.io.InputStream) CSVParser(com.Ostermiller.util.CSVParser) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) ArrayList(java.util.ArrayList) StringContains.containsString(org.junit.internal.matchers.StringContains.containsString) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Map(java.util.Map) Test(org.junit.Test)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6