Search in sources :

Example 1 with REFERENCE_URL

use of org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromZip.

@Test
public void importRecordsFromZip() throws StudyImporterException, IOException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/dwca.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    DatasetImpl dataset = new DatasetImpl("some/namespace", URI.create("file:///some/path/data.zip"), inStream -> inStream);
    JsonNode jsonNode = new ObjectMapper().readTree("{ " + "\"interactionTypeId\": \"http://purl.obolibrary.org/obo/RO_0002437\"," + "\"url\": \"" + resource.toExternalForm() + "\"" + "}");
    dataset.setConfig(jsonNode);
    studyImporterForDwCA.setDataset(dataset);
    String expectedCitation = dataset.getCitation();
    AtomicBoolean someRecords = new AtomicBoolean(false);
    Set<String> resourceTypes = new TreeSet<>();
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            String associatedTaxa = interaction.get("http://rs.tdwg.org/dwc/terms/associatedTaxa");
            String dynamicProperties = interaction.get("http://rs.tdwg.org/dwc/terms/dynamicProperties");
            assertThat(StringUtils.isNotBlank(associatedTaxa) || StringUtils.isNotBlank(dynamicProperties), is(true));
            assertThat(interaction.get(SOURCE_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(INTERACTION_TYPE_NAME), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString(expectedCitation));
            assertThat(interaction.get(REFERENCE_ID), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            assertThat(interaction.get(REFERENCE_URL), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            resourceTypes.addAll(Arrays.asList(splitByPipes(interaction.get(RESOURCE_TYPES))));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
    assertThat(resourceTypes, containsInAnyOrder("http://rs.tdwg.org/dwc/terms/dynamicProperties", "http://rs.tdwg.org/dwc/terms/Occurrence", "http://rs.tdwg.org/dwc/terms/associatedTaxa"));
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) TreeSet(java.util.TreeSet) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 2 with REFERENCE_URL

use of org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromArctosArchive.

@Test
public void importRecordsFromArctosArchive() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/arctos_mvz_bird_small.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", resource.toURI(), inStream -> inStream));
    List<String> families = new ArrayList<>();
    AtomicBoolean someRecords = new AtomicBoolean(false);
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            assertThat(interaction.get(REFERENCE_URL), startsWith("http://arctos.database.museum/guid/"));
            assertThat(interaction.get(SOURCE_OCCURRENCE_ID), anyOf(is("http://arctos.database.museum/guid/MVZ:Bird:180448?seid=587053"), is("http://arctos.database.museum/guid/MVZ:Bird:183644?seid=158590"), is("http://arctos.database.museum/guid/MVZ:Bird:58090?seid=657121")));
            if (interaction.containsKey(DatasetImporterForTSV.TARGET_OCCURRENCE_ID)) {
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), anyOf(is("http://arctos.database.museum/guid/MVZ:Herp:241200"), is("http://arctos.database.museum/guid/MVZ:Bird:183643"), is("http://arctos.database.museum/guid/MVZ:Bird:58093")));
            }
            assertThat(interaction.get(SOURCE_TAXON_FAMILY), anyOf(is("Accipitridae"), is("Strigidae")));
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/associatedOccurrences | http://rs.tdwg.org/dwc/terms/Occurrence"));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) Test(org.junit.Test)

Example 3 with REFERENCE_URL

use of org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecords.

@Test
public void importRecords() throws StudyImporterException, URISyntaxException, IOException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/dwca.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    DatasetImpl dataset = new DatasetImpl("some/namespace", resource.toURI(), inStream -> inStream);
    dataset.setConfig(new ObjectMapper().readTree("{ \"citation\": \"some citation\" }"));
    studyImporterForDwCA.setDataset(dataset);
    AtomicBoolean someRecords = new AtomicBoolean(false);
    Set<String> resourceTypes = new TreeSet<>();
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            String associatedTaxa = interaction.get("http://rs.tdwg.org/dwc/terms/associatedTaxa");
            String dynamicProperties = interaction.get("http://rs.tdwg.org/dwc/terms/dynamicProperties");
            assertThat(StringUtils.isNotBlank(associatedTaxa) || StringUtils.isNotBlank(dynamicProperties), is(true));
            assertThat(interaction.get(SOURCE_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(INTERACTION_TYPE_NAME), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("some citation"));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("Accessed at"));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("dataset/dwca.zip"));
            assertThat(interaction.get(REFERENCE_ID), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(not(nullValue())));
            assertThat(interaction.get(REFERENCE_URL), is(not(nullValue())));
            resourceTypes.addAll(Arrays.asList(splitByPipes(interaction.get(RESOURCE_TYPES))));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
    assertThat(resourceTypes, containsInAnyOrder("http://rs.tdwg.org/dwc/terms/dynamicProperties", "http://rs.tdwg.org/dwc/terms/Occurrence", "http://rs.tdwg.org/dwc/terms/associatedTaxa"));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) TreeSet(java.util.TreeSet) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 4 with REFERENCE_URL

use of org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method mapReferenceInfo.

static void mapReferenceInfo(Record rec, Map<String, String> interactionProperties) {
    String value = StringUtils.trim(rec.value(DcTerm.references));
    if (StringUtils.isBlank(value)) {
        value = StringUtils.trim(rec.value(DwcTerm.occurrenceID));
    }
    if (StringUtils.isNotBlank(value)) {
        appendResourceType(interactionProperties, rec.rowType());
        interactionProperties.put(REFERENCE_CITATION, value);
        interactionProperties.put(REFERENCE_ID, value);
        try {
            URI referenceURI = new URI(value);
            URL url = referenceURI.toURL();
            interactionProperties.put(REFERENCE_URL, url.toString());
        } catch (MalformedURLException | URISyntaxException | IllegalArgumentException e) {
        // opportunistic extraction of url from references to take advantage of practice used in Symbiota)
        }
    }
}
Also used : MalformedURLException(java.net.MalformedURLException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL)

Aggregations

URL (java.net.URL)4 REFERENCE_URL (org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 TreeSet (java.util.TreeSet)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 InteractionListener (org.eol.globi.process.InteractionListener)3 DatasetImpl (org.globalbioticinteractions.dataset.DatasetImpl)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 Test (org.junit.Test)3 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 File (java.io.File)1 IOException (java.io.IOException)1 MalformedURLException (java.net.MalformedURLException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1