Search in sources :

Example 11 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class LinkerDOITest method createStudyDOIlookupCitationDisabled.

@Test
public void createStudyDOIlookupCitationDisabled() throws NodeFactoryException {
    StudyImpl study1 = new StudyImpl("title", null, "some citation");
    study1.setExternalId("some:id");
    DatasetImpl originatingDataset = new DatasetImpl("some/namespace", URI.create("some:uri"), inStream -> inStream);
    ObjectNode objectNode = new ObjectMapper().createObjectNode();
    objectNode.put(DatasetConstant.SHOULD_RESOLVE_REFERENCES, false);
    originatingDataset.setConfig(objectNode);
    study1.setOriginatingDataset(originatingDataset);
    Study study = getNodeFactory().getOrCreateStudy(study1);
    assertThat(study.getDOI(), is(nullValue()));
    assertThat(study.getCitation(), is("some citation"));
    assertThat(study.getTitle(), is("title"));
    assertThat(study.getExternalId(), is("some:id"));
}
Also used : Study(org.eol.globi.domain.Study) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) StudyImpl(org.eol.globi.domain.StudyImpl) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 12 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForMetaTableTest method assertExpectedColumnCount.

public void assertExpectedColumnCount(String metaTableDef) throws IOException {
    final Class<DatasetImporterForMetaTable> clazz = DatasetImporterForMetaTable.class;
    final URL resource = clazz.getResource(metaTableDef);
    assertNotNull(resource);
    final InputStream inputStream = clazz.getResourceAsStream(metaTableDef);
    final JsonNode config = new ObjectMapper().readTree(inputStream);
    String baseUrl = resource.toExternalForm().replaceFirst(metaTableDef + "$", "");
    List<DatasetImporterForMetaTable.Column> columnNames = DatasetImporterForMetaTable.columnsFromExternalSchema(config.get("tableSchema"), new DatasetImpl(null, URI.create(baseUrl), inStream -> inStream));
    assertThat(columnNames.size(), is(40));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) HashMap(java.util.HashMap) StringUtils(org.apache.commons.lang3.StringUtils) StandardCharsets(java.nio.charset.StandardCharsets) InteractType(org.eol.globi.domain.InteractType) DatasetLocal(org.eol.globi.service.DatasetLocal) ArrayList(java.util.ArrayList) InteractTypeMapper(org.eol.globi.util.InteractTypeMapper) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) InputStream(java.io.InputStream) InputStream(java.io.InputStream) JsonNode(com.fasterxml.jackson.databind.JsonNode) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 13 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importTaxonDescriptionsFromDirUnsupportedDescriptionType.

@Test
public void importTaxonDescriptionsFromDirUnsupportedDescriptionType() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/coetzer-unsupported-description-type/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    List<Map<String, String>> links = new ArrayList<>();
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
            links.add(interaction);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(links.size(), is(0));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 14 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromZip.

@Test
public void importRecordsFromZip() throws StudyImporterException, IOException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/dwca.zip");
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    DatasetImpl dataset = new DatasetImpl("some/namespace", URI.create("file:///some/path/data.zip"), inStream -> inStream);
    JsonNode jsonNode = new ObjectMapper().readTree("{ " + "\"interactionTypeId\": \"http://purl.obolibrary.org/obo/RO_0002437\"," + "\"url\": \"" + resource.toExternalForm() + "\"" + "}");
    dataset.setConfig(jsonNode);
    studyImporterForDwCA.setDataset(dataset);
    String expectedCitation = dataset.getCitation();
    AtomicBoolean someRecords = new AtomicBoolean(false);
    Set<String> resourceTypes = new TreeSet<>();
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            String associatedTaxa = interaction.get("http://rs.tdwg.org/dwc/terms/associatedTaxa");
            String dynamicProperties = interaction.get("http://rs.tdwg.org/dwc/terms/dynamicProperties");
            assertThat(StringUtils.isNotBlank(associatedTaxa) || StringUtils.isNotBlank(dynamicProperties), is(true));
            assertThat(interaction.get(SOURCE_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(not(nullValue())));
            assertThat(interaction.get(INTERACTION_TYPE_NAME), is(not(nullValue())));
            assertThat(interaction.get(DatasetImporterForTSV.DATASET_CITATION), containsString(expectedCitation));
            assertThat(interaction.get(REFERENCE_ID), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            assertThat(interaction.get(REFERENCE_URL), startsWith("https://symbiota.ccber.ucsb.edu:443/collections/individual/index.php?occid"));
            resourceTypes.addAll(Arrays.asList(splitByPipes(interaction.get(RESOURCE_TYPES))));
            someRecords.set(true);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(someRecords.get(), is(true));
    assertThat(resourceTypes, containsInAnyOrder("http://rs.tdwg.org/dwc/terms/dynamicProperties", "http://rs.tdwg.org/dwc/terms/Occurrence", "http://rs.tdwg.org/dwc/terms/associatedTaxa"));
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InteractionListener(org.eol.globi.process.InteractionListener) TreeSet(java.util.TreeSet) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 15 with DatasetImpl

use of org.globalbioticinteractions.dataset.DatasetImpl in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importTaxonDescriptionsFromDir.

@Test
public void importTaxonDescriptionsFromDir() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/coetzer/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    List<Map<String, String>> links = new ArrayList<>();
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            links.add(interaction);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(links.size() > 0, is(true));
    assertThat(links.get(0).get(DATASET_CITATION), containsString("org/globalbioticinteractions/dataset/coetzer/"));
    assertThat(links.get(0).get(REFERENCE_CITATION), is("Cockerell, T.D.A. 1937. African bees of the genera Ceratina, Halictus and Megachile. 254 pp. William Clowes and Sons, London"));
    assertThat(links.get(0).get(TARGET_TAXON_NAME), is("Chaetodactylus leleupi"));
    assertThat(links.get(0).get(SOURCE_TAXON_NAME), is("Ceratina ruwenzorica Cockerell, 1937"));
    assertThat(links.get(0).get(INTERACTION_TYPE_NAME), is("Parasite"));
    assertThat(links.get(0).get(RESOURCE_TYPES), is("http://rs.gbif.org/terms/1.0/Reference"));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Aggregations

DatasetImpl (org.globalbioticinteractions.dataset.DatasetImpl)83 Test (org.junit.Test)73 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)50 URI (java.net.URI)32 JsonNode (com.fasterxml.jackson.databind.JsonNode)31 Dataset (org.globalbioticinteractions.dataset.Dataset)25 ArrayList (java.util.ArrayList)22 IOException (java.io.IOException)21 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)21 InteractionListener (org.eol.globi.process.InteractionListener)19 URL (java.net.URL)18 Map (java.util.Map)17 StudyNode (org.eol.globi.domain.StudyNode)17 HashMap (java.util.HashMap)16 Is.is (org.hamcrest.core.Is.is)16 List (java.util.List)15 StudyImpl (org.eol.globi.domain.StudyImpl)15 StringUtils (org.apache.commons.lang3.StringUtils)14 CoreMatchers.nullValue (org.hamcrest.CoreMatchers.nullValue)14 Is (org.hamcrest.core.Is)14