Search in sources :

Example 1 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForMetaTableIT method importREEMWithStaticCSV.

@Test
public void importREEMWithStaticCSV() throws IOException, StudyImporterException {
    final List<Map<String, String>> links = new ArrayList<Map<String, String>>();
    final InteractionListener interactionListener = links::add;
    final DatasetImporterForMetaTable.TableParserFactory tableFactory = (config, dataset) -> {
        String firstFewLines = "Hauljoin,\" Pred_nodc\",\" Pred_specn\",\" Prey_nodc\",\" Pred_len\",\" Year\",\" Month\",\" day\",\" region\",\" Pred_name\",\" Prey_Name\",\" Vessel\",\" Cruise\",\" Haul\",\" Rlat\",\" Rlong\",\" Gear_depth\",\" Bottom_depth\",\" Start_hour\",\" Surface_temp\",\" Gear_temp\",\" INPFC_Area\",\" Stationid\",\" Start_date\",\" Prey_sz1\",\" Prey_sex\"\n" + "11012118.0,8791030401.0,5.0,9999999998.0,53.0,1994.0,7.0,11.0,AI,\"Pacific cod Gadus macrocephalus\",\"Rocks \",95.0,199401.0,148.0,51.43,178.81999999999999,222.0,228.0,11.0,0.63,0.41999999999999998,542.0,118-11,\"1994-07-11 00:00:00\",3.0,\n" + "11012118.0,8791030401.0,8.0,9999999998.0,53.0,1994.0,7.0,11.0,AI,\"Pacific cod Gadus macrocephalus\",\"Rocks \",95.0,199401.0,148.0,51.43,178.81999999999999,222.0,228.0,11.0,0.63,0.41999999999999998,542.0,118-11,\"1994-07-11 00:00:00\",3.0,\n" + "11012118.0,8791030401.0,9.0,9999999998.0,58.0,1994.0,7.0,11.0,AI,\"Pacific cod Gadus macrocephalus\",\"Rocks \",95.0,199401.0,148.0,51.43,178.81999999999999,222.0,228.0,11.0,0.63,0.41999999999999998,542.0,118-11,\"1994-07-11 00:00:00\",13.0,\n" + "11012118.0,8791030401.0,9.0,9999999998.0,58.0,1994.0,7.0,11.0,AI,\"Pacific cod Gadus macrocephalus\",\"Rocks \",95.0,199401.0,148.0,51.43,178.81999999999999,222.0,228.0,11.0,0.63,0.41999999999999998,542.0,118-11,\"1994-07-11 00:00:00\",3.0,\n";
        return CSVTSVUtil.createLabeledCSVParser(CSVTSVUtil.createExcelCSVParse(IOUtils.toInputStream(firstFewLines, StandardCharsets.UTF_8)));
    };
    final String baseUrl = "https://raw.githubusercontent.com/globalbioticinteractions/noaa-reem/main";
    final String resource = baseUrl + "/globi.json";
    importAll(interactionListener, tableFactory, baseUrl, resource);
    assertThat(links.size(), is(12));
    final Map<String, String> firstLine = links.get(0);
    assertThat(firstLine.get(DatasetImporterForTSV.INTERACTION_TYPE_ID), is("http://purl.obolibrary.org/obo/RO_0002470"));
    assertThat(firstLine.get(DatasetImporterForTSV.INTERACTION_TYPE_NAME), is("eats"));
    assertThat(firstLine.get(TaxonUtil.TARGET_TAXON_ID), is(nullValue()));
    assertThat(firstLine.get(TaxonUtil.TARGET_TAXON_NAME), is("Rocks"));
    assertThat(firstLine.get(TaxonUtil.SOURCE_TAXON_ID), is("NODC:8791030401"));
    assertThat(firstLine.get(TaxonUtil.SOURCE_TAXON_NAME), is("Pacific cod Gadus macrocephalus"));
    assertThat(firstLine.get(DatasetImporterForMetaTable.EVENT_DATE), startsWith("1994-07-11"));
    assertThat(firstLine.get(DatasetImporterForMetaTable.LATITUDE), is("51.43"));
    assertThat(firstLine.get(DatasetImporterForMetaTable.LONGITUDE), is("178.81999999999999"));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CSVTSVUtil(org.eol.globi.util.CSVTSVUtil) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) ResourceUtil(org.eol.globi.util.ResourceUtil) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StringStartsWith.startsWith(org.hamcrest.core.StringStartsWith.startsWith) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) IsNot.not(org.hamcrest.core.IsNot.not) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) InputStream(java.io.InputStream) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) Matchers.containsString(org.hamcrest.Matchers.containsString) Map(java.util.Map) Test(org.junit.Test)

Example 2 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForMetaTableIT method importAll.

@Test
public void importAll() throws IOException, StudyImporterException {
    final List<Map<String, String>> links = new ArrayList<Map<String, String>>();
    final InteractionListener interactionListener = links::add;
    final DatasetImporterForMetaTable.TableParserFactory tableFactory = (config, dataset) -> {
        String firstFewLines = "intertype,obstype,effunit,effort,obsunit,obsquant,germnotes,\"REPLACE(Interaction.notes, ',', ';')\",AnimalNumber,AnimalClass,AnimalOrder,AnimalFamily,AnimalGenus,AnimalSpecies,AnimalSubSpecies,AnimalType,AnimalCommonName,PlantNumber,PlantFamily,PlantGenus,PlantSpecies,PlantSubSpecies,country,region,ProvinceDistrictCity,ProtectedArea,HabitatWhite,HabitatAuthor,author,title,year,journal,volume,number,pages,USER,DEF_timestamp,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,4035,Poaceae,Cynodon,dactylon,NULL,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,3639,Poaceae,Aristida,canescens,NULL,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,3574,Poaceae,Andropogon,eucomus,NULL,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,5125,Phyllanthaceae,Phyllanthus,reticulatus,NULL,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,399,Myrtaceae,Syzygium,cordatum,,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,374,Moraceae,Ficus,sycomorus,,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,months,4,dung density,,,Article focused on elephant density per habitat type based on seed/plant types identified in dung at the various research locations. All identified plant types are being assumed to be dispersed by the elephants,1441,Mammalia,Proboscidea,Elephantidae,Loxodonta,africana,,NULL,African Bush Elephant,4398,Moraceae,Ficus,sp,NULL,Mozambique,NULL,NULL,yes,forest transitions and mosaics,mangroves dune grass plains forest woodland riverine,\"De Boer, W.F. and Ntumi, C.P. and Correia, A.U. and Mafuca, J.M.\",Diet and distribution of elephant in the Maputo Elephant Reserve; Mozambique,2000,African Journal of Ecology,38,3,188-201,Mary,0000-00-00 00:00:00,,,\n" + "seed disperser,direct observation,years,4,NULL,NULL,NULL,NULL,3051,Animal,Animal,Animal,Animal,animal,NULL,general animal,NULL,4176,Caesalpinioideae,Distemonanthus,benthamianus,NULL,Cameroon,NULL,NULL,yes,NULL,semideciduous tropical rain forest,\"Hardesty, B.D. and Parker, V.T.\",Community seed rain patterns and a comparison to adult community structure in a West African tropical forest,2003,Plant Ecology,164,1,49-64,Mary,8/15/12 9:35,,,\n" + "ingestion,direct observation,years,2,NULL,NULL,NULL,during both summer and winter season,1462,Mammalia,Artiodactyla,Bovidae,Madoqua,kirkii,,NULL,Kirk's Dikdik,6897,Moraceae,Ficus,petersii,NULL,Namibia,South West Africa,NULL,yes,NULL,riverine thicket,\"Tinley, K.\",Dikdik; Madoqua kirkii; in south-west Africa: notes on distribution; ecology; and behaviour,1969,Madoqua,1,NULL,Jul-33,Anna,2/24/14 18:40,,,\n";
        return CSVTSVUtil.createLabeledCSVParser(CSVTSVUtil.createExcelCSVParse(IOUtils.toInputStream(firstFewLines, StandardCharsets.UTF_8)));
    };
    final String baseUrl = "https://raw.githubusercontent.com/globalbioticinteractions/AfricaTreeDatabase/main";
    final String resource = baseUrl + "/globi.json";
    importAll(interactionListener, tableFactory, baseUrl, resource);
    assertThat(links.size(), is(9));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CSVTSVUtil(org.eol.globi.util.CSVTSVUtil) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) ResourceUtil(org.eol.globi.util.ResourceUtil) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StringStartsWith.startsWith(org.hamcrest.core.StringStartsWith.startsWith) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) IsNot.not(org.hamcrest.core.IsNot.not) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) InputStream(java.io.InputStream) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) Matchers.containsString(org.hamcrest.Matchers.containsString) Map(java.util.Map) Test(org.junit.Test)

Example 3 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForMetaTableIT method importAll.

public static void importAll(InteractionListener interactionListener, DatasetImporterForMetaTable.TableParserFactory tableFactory, String baseUrl, String resource) throws IOException, StudyImporterException {
    final InputStream inputStream = ResourceUtil.asInputStream(resource, inStream -> inStream);
    final JsonNode config = new ObjectMapper().readTree(inputStream);
    final Dataset dataset = new DatasetImpl("some/namespace", URI.create("http://example.com"), inStream -> inStream);
    dataset.setConfig(config);
    for (JsonNode table : DatasetImporterForMetaTable.collectTables(dataset)) {
        DatasetImporterForMetaTable.importTable(interactionListener, tableFactory, table, new DatasetImpl(null, URI.create(baseUrl), inStream -> inStream), null);
    }
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CSVTSVUtil(org.eol.globi.util.CSVTSVUtil) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) ResourceUtil(org.eol.globi.util.ResourceUtil) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StringStartsWith.startsWith(org.hamcrest.core.StringStartsWith.startsWith) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) IsNot.not(org.hamcrest.core.IsNot.not) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) InputStream(java.io.InputStream) InputStream(java.io.InputStream) Dataset(org.globalbioticinteractions.dataset.Dataset) JsonNode(com.fasterxml.jackson.databind.JsonNode) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 4 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForPensoftIT method importStudy.

@Test
public void importStudy() throws StudyImporterException, URISyntaxException {
    final DatasetImporterForPensoft importer = new DatasetImporterForPensoft(new ParserFactoryLocal(), null);
    final Dataset dataset = new DatasetImpl("some/name", URI.create("some:uri"), in -> in);
    final ObjectNode objectNode = new ObjectMapper().createObjectNode();
    final URL resource = getClass().getResource("pensoft/annotated-tables-first-two.json");
    objectNode.put("url", resource.toURI().toString());
    objectNode.put("citation", "some dataset citation");
    dataset.setConfig(objectNode);
    List<Map<String, String>> links = new ArrayList<>();
    importer.setDataset(dataset);
    importer.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            links.add(interaction);
        }
    });
    importer.importStudy();
    assertThat(links.size(), is(121));
    assertThat(links.get(0), hasEntry("Family Name", "Acanthaceae"));
    assertThat(links.get(0), hasEntry("Family Name_expanded_taxon_name", "Acanthaceae"));
    assertThat(links.get(0), hasEntry("Family Name_expanded_taxon_id", "http://openbiodiv.net/4B689A17-2541-4F5F-A896-6F0C2EEA3FB4"));
    assertThat(links.get(0), hasEntry("referenceUrl", "https://doi.org/10.3897/zookeys.306.5455"));
    assertThat(links.get(0), hasEntry("referenceDoi", "10.3897/zookeys.306.5455"));
    assertThat(links.get(0), hasEntry("referenceCitation", "Dewi Sartiami, Laurence A. Mound. 2013. Identification of the terebrantian thrips (Insecta, Thysanoptera) associated with cultivated plantsĀ inĀ Java, Indonesia. ZooKeys. https://doi.org/10.3897/zookeys.306.5455"));
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Dataset(org.globalbioticinteractions.dataset.Dataset) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) InteractionListener(org.eol.globi.process.InteractionListener) Map(java.util.Map) TreeMap(java.util.TreeMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) TestUtil.getResourceServiceTest(org.eol.globi.data.TestUtil.getResourceServiceTest)

Example 5 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForRSSTest method embeddedDataset.

@Test
public void embeddedDataset() throws IOException {
    Dataset embeddedDataset = embeddedDatasetFor(getDatasetGroup(), URI.create("http://example.com/archive.zip"));
    assertThat(embeddedDataset.getCitation(), is("some other citation"));
    assertThat(embeddedDataset.getOrDefault(DatasetConstant.SHOULD_RESOLVE_REFERENCES, "foo"), is("foo"));
    assertThat(embeddedDataset.getArchiveURI().toString(), is("http://example.com/archive.zip"));
}
Also used : Dataset(org.globalbioticinteractions.dataset.Dataset) Test(org.junit.Test)

Aggregations

Dataset (org.globalbioticinteractions.dataset.Dataset)68 Test (org.junit.Test)46 DatasetImpl (org.globalbioticinteractions.dataset.DatasetImpl)25 URI (java.net.URI)20 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)17 JsonNode (com.fasterxml.jackson.databind.JsonNode)14 Matchers.containsString (org.hamcrest.Matchers.containsString)14 IOException (java.io.IOException)11 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)10 InputStream (java.io.InputStream)9 Is.is (org.hamcrest.core.Is.is)9 StudyNode (org.eol.globi.domain.StudyNode)8 Node (org.neo4j.graphdb.Node)8 URL (java.net.URL)7 ArrayList (java.util.ArrayList)7 StringStartsWith.startsWith (org.hamcrest.core.StringStartsWith.startsWith)7 Assert.assertNotNull (org.junit.Assert.assertNotNull)7 BaseDatasetImporter (org.eol.globi.data.BaseDatasetImporter)6 DatasetImporter (org.eol.globi.data.DatasetImporter)6 DatasetNode (org.eol.globi.domain.DatasetNode)6