Search in sources :

Example 16 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromArctosArchive.

@Test
public void importRecordsFromArctosArchive() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/arctos_mvz_bird_small.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", resource.toURI(), inStream -> inStream));
    List<String> families = new ArrayList<>();
    AtomicBoolean someRecords = new AtomicBoolean(false);
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            assertThat(interaction.get(REFERENCE_URL), startsWith("http://arctos.database.museum/guid/"));
            assertThat(interaction.get(SOURCE_OCCURRENCE_ID), anyOf(is("http://arctos.database.museum/guid/MVZ:Bird:180448?seid=587053"), is("http://arctos.database.museum/guid/MVZ:Bird:183644?seid=158590"), is("http://arctos.database.museum/guid/MVZ:Bird:58090?seid=657121")));
            if (interaction.containsKey(DatasetImporterForTSV.TARGET_OCCURRENCE_ID)) {
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), anyOf(is("http://arctos.database.museum/guid/MVZ:Herp:241200"), is("http://arctos.database.museum/guid/MVZ:Bird:183643"), is("http://arctos.database.museum/guid/MVZ:Bird:58093")));
            }
            assertThat(interaction.get(SOURCE_TAXON_FAMILY), anyOf(is("Accipitridae"), is("Strigidae")));
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/associatedOccurrences | http://rs.tdwg.org/dwc/terms/Occurrence"));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) Test(org.junit.Test)

Example 17 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromMCZ.

@Test
public void importRecordsFromMCZ() throws StudyImporterException, URISyntaxException {
    StringBuilder actualMessage = new StringBuilder();
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/mcz/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    AtomicInteger recordCounter = new AtomicInteger(0);
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setLogger(new NullImportLogger() {

        @Override
        public void severe(LogContext ctx, String message) {
            actualMessage.append(message);
        }
    });
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            for (String expectedProperty : new String[] {}) {
                assertThat("no [" + expectedProperty + "] found in " + interaction, interaction.containsKey(expectedProperty), is(true));
                assertThat("no value of [" + expectedProperty + "] found in " + interaction, interaction.get(expectedProperty), is(notNullValue()));
            }
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
            recordCounter.incrementAndGet();
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(recordCounter.get(), is(0));
    assertThat(actualMessage.toString(), startsWith("[failed to handle dwc record]"));
}
Also used : NullImportLogger(org.eol.globi.tool.NullImportLogger) TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) LogContext(org.eol.globi.domain.LogContext) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) File(java.io.File) Test(org.junit.Test)

Example 18 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromResourceRelationshipArchiveRemarksOnly.

@Test
public void importRecordsFromResourceRelationshipArchiveRemarksOnly() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("fmnh-rr-8278596f-4d3f-4f82-8cd1-b5070fe1bc7c.zip");
    AtomicInteger recordCounter = new AtomicInteger(0);
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", resource.toURI(), inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(interaction -> {
        assertThat(interaction.get(TARGET_TAXON_NAME), is("Glaucomys volans"));
        assertThat(interaction.get(TARGET_OCCURRENCE_ID), is(nullValue()));
        assertThat(interaction.get(SOURCE_TAXON_NAME), is("Orchopeas fulleri Traub, 1950"));
        assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("8278596f-4d3f-4f82-8cd1-b5070fe1bc7c"));
        recordCounter.incrementAndGet();
        assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
    });
    studyImporterForDwCA.importStudy();
    assertThat(recordCounter.get(), greaterThan(0));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) Test(org.junit.Test)

Example 19 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecords.

@Test
public void importRecords() throws StudyImporterException, URISyntaxException, IOException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/dwca.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    DatasetImpl dataset = new DatasetImpl("some/namespace", resource.toURI(), inStream -> inStream);
    dataset.setConfig(new ObjectMapper().readTree("{ \"citation\": \"some citation\" }"));
    studyImporterForDwCA.setDataset(dataset);
    AtomicBoolean someRecords = new AtomicBoolean(false);
    Set<String> resourceTypes = new TreeSet<>();
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            String associatedTaxa = interaction.get("http://rs.tdwg.org/dwc/terms/associatedTaxa");
            String dynamicProperties = interaction.get("http://rs.tdwg.org/dwc/terms/dynamicProperties");
            assertThat(StringUtils.isNotBlank(associatedTaxa) || StringUtils.isNotBlank(dynamicProperties), is(true));
            assertThat(interaction.get(SOURCE_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(INTERACTION_TYPE_NAME), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("some citation"));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("Accessed at"));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString("dataset/dwca.zip"));
            assertThat(interaction.get(REFERENCE_ID), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(not(nullValue())));
            assertThat(interaction.get(REFERENCE_URL), is(not(nullValue())));
            resourceTypes.addAll(Arrays.asList(splitByPipes(interaction.get(RESOURCE_TYPES))));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
    assertThat(resourceTypes, containsInAnyOrder("http://rs.tdwg.org/dwc/terms/dynamicProperties", "http://rs.tdwg.org/dwc/terms/Occurrence", "http://rs.tdwg.org/dwc/terms/associatedTaxa"));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) TreeSet(java.util.TreeSet) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 20 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForRegistryTest method filteredDatasets.

@Test
public void filteredDatasets() throws StudyImporterException {
    DatasetImporterForRegistry importer = new DatasetImporterForRegistry(null, null, new DatasetRegistry() {

        @Override
        public Collection<String> findNamespaces() throws DatasetRegistryException {
            return Collections.singletonList("some/namespace");
        }

        @Override
        public Dataset datasetFor(String namespace) throws DatasetRegistryException {
            DatasetImpl dataset = new DatasetImpl("some/namespace", URI.create("some:uri"), in -> in) {

                @Override
                public InputStream retrieve(URI resource) throws IOException {
                    if (!StringUtils.endsWith(resource.toString(), "globi.json")) {
                        throw new IOException();
                    }
                    return IOUtils.toInputStream("{\"some\":\"thing\"}", StandardCharsets.UTF_8);
                }
            };
            return dataset;
        }
    });
    importer.setDatasetFilter(x -> false);
    importer.importStudy();
}
Also used : DatasetRegistryException(org.globalbioticinteractions.dataset.DatasetRegistryException) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) Collection(java.util.Collection) Test(org.junit.Test) IOException(java.io.IOException) StringUtils(org.apache.commons.lang3.StringUtils) StandardCharsets(java.nio.charset.StandardCharsets) Is(org.hamcrest.core.Is) IOUtils(org.apache.commons.io.IOUtils) Dataset(org.globalbioticinteractions.dataset.Dataset) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Collections(java.util.Collections) DatasetRegistry(org.globalbioticinteractions.dataset.DatasetRegistry) InputStream(java.io.InputStream) DatasetRegistryException(org.globalbioticinteractions.dataset.DatasetRegistryException) Dataset(org.globalbioticinteractions.dataset.Dataset) InputStream(java.io.InputStream) Collection(java.util.Collection) DatasetRegistry(org.globalbioticinteractions.dataset.DatasetRegistry) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) IOException(java.io.IOException) URI(java.net.URI) Test(org.junit.Test)

Aggregations

DatasetImpl (org.globalbioticinteractions.dataset.DatasetImpl)83 Test (org.junit.Test)73 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)50 URI (java.net.URI)32 JsonNode (com.fasterxml.jackson.databind.JsonNode)31 Dataset (org.globalbioticinteractions.dataset.Dataset)25 ArrayList (java.util.ArrayList)22 IOException (java.io.IOException)21 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)21 InteractionListener (org.eol.globi.process.InteractionListener)19 URL (java.net.URL)18 Map (java.util.Map)17 StudyNode (org.eol.globi.domain.StudyNode)17 HashMap (java.util.HashMap)16 Is.is (org.hamcrest.core.Is.is)16 List (java.util.List)15 StudyImpl (org.eol.globi.domain.StudyImpl)15 StringUtils (org.apache.commons.lang3.StringUtils)14 CoreMatchers.nullValue (org.hamcrest.CoreMatchers.nullValue)14 Is (org.hamcrest.core.Is)14