Search in sources :

Example 1 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importRecordsFromMCZ.

@Test
public void importRecordsFromMCZ() throws StudyImporterException, URISyntaxException {
    StringBuilder actualMessage = new StringBuilder();
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/mcz/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    AtomicInteger recordCounter = new AtomicInteger(0);
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setLogger(new NullImportLogger() {

        @Override
        public void severe(LogContext ctx, String message) {
            actualMessage.append(message);
        }
    });
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            for (String expectedProperty : new String[] {}) {
                assertThat("no [" + expectedProperty + "] found in " + interaction, interaction.containsKey(expectedProperty), is(true));
                assertThat("no value of [" + expectedProperty + "] found in " + interaction, interaction.get(expectedProperty), is(notNullValue()));
            }
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
            recordCounter.incrementAndGet();
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(recordCounter.get(), is(0));
    assertThat(actualMessage.toString(), startsWith("[failed to handle dwc record]"));
}
Also used : NullImportLogger(org.eol.globi.tool.NullImportLogger) TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) LogContext(org.eol.globi.domain.LogContext) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) File(java.io.File) Test(org.junit.Test)

Example 2 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method createReferenceEnricher.

private static InteractionListenerClosable createReferenceEnricher(Archive archive, final InteractionListener interactionListener) {
    return new InteractionListenerClosable() {

        private BTreeMap<String, Map<String, String>> referenceMap = null;

        @Override
        public void close() {
            if (referenceMap != null) {
                referenceMap.close();
                referenceMap = null;
            }
        }

        private void initIfNeeded() {
            if (referenceMap == null) {
                referenceMap = MapDBUtil.createBigMap();
                ArchiveFile extension = findResourceExtension(archive, EXTENSION_REFERENCE);
                if (extension != null) {
                    for (Record record : extension) {
                        Map<String, String> props = new TreeMap<>();
                        termsToMap(record, props);
                        props.put(REFERENCE_CITATION, CitationUtil.citationFor(props));
                        appendResourceType(props, extension.getRowType());
                        referenceMap.put(record.id(), props);
                    }
                }
            }
        }

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            initIfNeeded();
            String s = interaction.get(DWC_COREID);
            Map<String, String> enrichedLink = contains(referenceMap, s) ? new TreeMap<String, String>(interaction) {

                {
                    putAll(referenceMap.get(s));
                }
            } : interaction;
            interactionListener.on(enrichedLink);
        }
    };
}
Also used : BTreeMap(org.mapdb.BTreeMap) InteractionListenerClosable(org.eol.globi.process.InteractionListenerClosable) Record(org.gbif.dwc.record.Record) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) Map(java.util.Map) HashMap(java.util.HashMap) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) ArchiveFile(org.gbif.dwc.ArchiveFile)

Example 3 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method resolveLocalResourceIds.

private static void resolveLocalResourceIds(Archive archive, Map<String, Map<String, Map<String, String>>> termIdPropMap, Set<String> referencedSourceIds, Set<String> referencedTargetIds, List<DwcTerm> termTypes) {
    List<ArchiveFile> archiveFiles = new ArrayList<>();
    archiveFiles.add(archive.getCore());
    ArchiveFile taxon = findResourceExtension(archive, EXTENSION_TAXON);
    if (taxon != null) {
        archiveFiles.add(taxon);
    }
    for (ArchiveFile archiveFile : archiveFiles) {
        for (Record record : archiveFile) {
            for (DwcTerm termType : termTypes) {
                attemptLinkUsingTerm(termIdPropMap, referencedSourceIds, referencedTargetIds, record, termType);
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Record(org.gbif.dwc.record.Record) ArchiveFile(org.gbif.dwc.ArchiveFile) DwcTerm(org.gbif.dwc.terms.DwcTerm)

Example 4 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importDescriptionExtension.

private static void importDescriptionExtension(InteractionListener interactionListener, ImportLogger logger, ArchiveFile extension, ArchiveFile core, BTreeMap<String, Map<String, String>> associationsMap) {
    for (Record record : extension) {
        Map<String, String> props = new TreeMap<>();
        termsToMap(record, props);
        associationsMap.put(record.id(), props);
    }
    for (Record coreRecord : core) {
        String id = coreRecord.id();
        if (contains(associationsMap, id)) {
            try {
                Map<String, String> targetProperties = associationsMap.get(id);
                String referenceCitation = targetProperties.get("http://purl.org/dc/terms/source");
                String descriptionType = targetProperties.get("http://purl.org/dc/terms/type");
                if (isUnsupportedDescriptionType(descriptionType)) {
                    if (logger != null) {
                        logger.info(null, "ignoring unsupported taxon description of type [" + descriptionType + "]");
                    }
                } else {
                    String interactionTypeNameDefault = isEcologyDescription(descriptionType) ? null : "";
                    List<Map<String, String>> maps = AssociatedTaxaUtil.parseAssociatedTaxa(targetProperties.get("http://purl.org/dc/terms/description"), interactionTypeNameDefault);
                    for (Map<String, String> map : maps) {
                        TreeMap<String, String> interaction = new TreeMap<>(map);
                        interaction.put(DWC_COREID, id);
                        mapCoreProperties(coreRecord, interaction);
                        if (StringUtils.isNotBlank(referenceCitation)) {
                            interaction.put(REFERENCE_CITATION, referenceCitation);
                            String urlString = ExternalIdUtil.urlForExternalId(referenceCitation);
                            if (ExternalIdUtil.isSupported(urlString)) {
                                interaction.put(REFERENCE_URL, urlString);
                            }
                        }
                        interactionListener.on(interaction);
                    }
                }
            } catch (StudyImporterException e) {
            // 
            }
        }
    }
}
Also used : Record(org.gbif.dwc.record.Record) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) Map(java.util.Map) HashMap(java.util.HashMap) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap)

Example 5 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method nonInteractionRecordMessage.

@Test
public void nonInteractionRecordMessage() throws StudyImporterException, URISyntaxException {
    List<String> msgs = new ArrayList<>();
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/mcz-adjusted/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    AtomicInteger recordCounter = new AtomicInteger(0);
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setLogger(new NullImportLogger() {

        @Override
        public void info(LogContext ctx, String message) {
            msgs.add(message);
        }
    });
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
            recordCounter.incrementAndGet();
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(recordCounter.get(), is(0));
    String joinedMsgs = StringUtils.join(msgs, "\n");
    assertThat(joinedMsgs, containsString("]: indexing interaction records"));
    assertThat(joinedMsgs, containsString("]: scanned [1] record(s)"));
}
Also used : NullImportLogger(org.eol.globi.tool.NullImportLogger) TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) ArrayList(java.util.ArrayList) LogContext(org.eol.globi.domain.LogContext) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) File(java.io.File) Test(org.junit.Test)

Aggregations

Record (org.gbif.dwc.record.Record)10 TreeMap (java.util.TreeMap)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 ArrayList (java.util.ArrayList)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 JsonNode (com.fasterxml.jackson.databind.JsonNode)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 File (java.io.File)3 IOException (java.io.IOException)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 URL (java.net.URL)3 Arrays (java.util.Arrays)3 List (java.util.List)3 Set (java.util.Set)3 MapUtils (org.apache.commons.collections4.MapUtils)3 StringUtils (org.apache.commons.lang3.StringUtils)3 DATASET_CITATION (org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION)3 INTERACTION_TYPE_ID (org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID)3