Search in sources :

Example 1 with INTERACTION_TYPE_NAME

use of org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method importTaxonDescriptionsFromDir.

@Test
public void importTaxonDescriptionsFromDir() throws StudyImporterException, URISyntaxException {
    URL resource = getClass().getResource("/org/globalbioticinteractions/dataset/coetzer/meta.xml");
    URI archiveRoot = new File(resource.toURI()).getParentFile().toURI();
    List<Map<String, String>> links = new ArrayList<>();
    DatasetImporterForDwCA studyImporterForDwCA = new DatasetImporterForDwCA(null, null);
    studyImporterForDwCA.setDataset(new DatasetImpl("some/namespace", archiveRoot, inStream -> inStream));
    studyImporterForDwCA.setInteractionListener(new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            links.add(interaction);
        }
    });
    studyImporterForDwCA.importStudy();
    assertThat(links.size() > 0, is(true));
    assertThat(links.get(0).get(DATASET_CITATION), containsString("org/globalbioticinteractions/dataset/coetzer/"));
    assertThat(links.get(0).get(REFERENCE_CITATION), is("Cockerell, T.D.A. 1937. African bees of the genera Ceratina, Halictus and Megachile. 254 pp. William Clowes and Sons, London"));
    assertThat(links.get(0).get(TARGET_TAXON_NAME), is("Chaetodactylus leleupi"));
    assertThat(links.get(0).get(SOURCE_TAXON_NAME), is("Ceratina ruwenzorica Cockerell, 1937"));
    assertThat(links.get(0).get(INTERACTION_TYPE_NAME), is("Parasite"));
    assertThat(links.get(0).get(RESOURCE_TYPES), is("http://rs.gbif.org/terms/1.0/Reference"));
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) StringUtils(org.apache.commons.lang3.StringUtils) NullImportLogger(org.eol.globi.tool.NullImportLogger) InteractType(org.eol.globi.domain.InteractType) Is(org.hamcrest.core.Is) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) DatasetImporterForDwCA.parseAssociatedSequences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedSequences) DatasetImporterForDwCA.importAssociatedTaxaExtension(org.eol.globi.data.DatasetImporterForDwCA.importAssociatedTaxaExtension) DatasetImporterForDwCA.mapReferenceInfo(org.eol.globi.data.DatasetImporterForDwCA.mapReferenceInfo) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) EXTENSION_RESOURCE_RELATIONSHIP(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_RESOURCE_RELATIONSHIP) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) IsNot.not(org.hamcrest.core.IsNot.not) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) DatasetImporterForDwCA.importResourceRelationshipExtension(org.eol.globi.data.DatasetImporterForDwCA.importResourceRelationshipExtension) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) TestCase.assertNull(junit.framework.TestCase.assertNull) Matchers.startsWith(org.hamcrest.Matchers.startsWith) DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly(org.eol.globi.data.DatasetImporterForDwCA.parseDynamicPropertiesForInteractionsOnly) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) EXTENSION_ASSOCIATED_TAXA(org.eol.globi.data.DatasetImporterForDwCA.EXTENSION_ASSOCIATED_TAXA) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) CoreMatchers.anyOf(org.hamcrest.CoreMatchers.anyOf) LogContext(org.eol.globi.domain.LogContext) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) DwcTerm.relatedResourceID(org.gbif.dwc.terms.DwcTerm.relatedResourceID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Assert.assertTrue(org.junit.Assert.assertTrue) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) Test(org.junit.Test) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) DatasetImporterForDwCA.parseAssociatedOccurrences(org.eol.globi.data.DatasetImporterForDwCA.parseAssociatedOccurrences) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) TreeMap(java.util.TreeMap) DcTerm(org.gbif.dwc.terms.DcTerm) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) ArrayList(java.util.ArrayList) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) URL(java.net.URL) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) InteractionListener(org.eol.globi.process.InteractionListener) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 2 with INTERACTION_TYPE_NAME

use of org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importInteractionsFromResourceRelationships.

private static void importInteractionsFromResourceRelationships(InteractionListener interactionListener, ArchiveFile resourceExtension, Map<String, Map<String, Map<String, String>>> termTypeIdPropMap, List<DwcTerm> termTypes) {
    for (Record record : resourceExtension) {
        Map<String, String> props = new TreeMap<>();
        appendResourceType(props, resourceExtension.getRowType());
        String sourceId = record.value(DwcTerm.resourceID);
        String relationship = record.value(DwcTerm.relationshipOfResource);
        Optional<Term> relationshipOfResourceIDTerm = record.terms().stream().filter(x -> StringUtils.equals(x.simpleName(), "relationshipOfResourceID")).findFirst();
        String relationshipTypeIdValue = relationshipOfResourceIDTerm.map(record::value).orElse(null);
        String targetId = record.value(DwcTerm.relatedResourceID);
        if (StringUtils.isNotBlank(sourceId)) {
            appendVerbatimResourceRelationsValues(record, props);
            String relationshipAccordingTo = record.value(DwcTerm.relationshipAccordingTo);
            if (StringUtils.isNotBlank(relationshipAccordingTo)) {
                props.putIfAbsent(REFERENCE_CITATION, relationshipAccordingTo);
            }
            putIfAbsentAndNotBlank(props, INTERACTION_TYPE_NAME, relationship);
            putIfAbsentAndNotBlank(props, INTERACTION_TYPE_ID, relationshipTypeIdValue);
            putIfAbsentAndNotBlank(props, DatasetImporterForMetaTable.EVENT_DATE, record.value(DwcTerm.relationshipEstablishedDate));
            for (DwcTerm termType : termTypes) {
                String key = termType.qualifiedName();
                if (StringUtils.isNoneBlank(key) && termTypeIdPropMap.containsKey(key)) {
                    Map<String, Map<String, String>> propMap = termTypeIdPropMap.get(termType.qualifiedName());
                    populatePropertiesAssociatedWithId(props, sourceId, true, propMap.get(sourceId), labelPairFor(termType));
                    extractNameFromRelationshipRemarks(record).ifPresent(name -> props.put(TARGET_TAXON_NAME, name));
                    populatePropertiesAssociatedWithId(props, targetId, false, propMap.get(targetId), labelPairFor(termType));
                }
            }
            try {
                interactionListener.on(props);
            } catch (StudyImporterException e) {
            // 
            }
        }
    }
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CacheUtil(org.globalbioticinteractions.cache.CacheUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) ExternalIdUtil(org.eol.globi.util.ExternalIdUtil) SOURCE_TAXON_SUBGENUS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_SUBGENUS) StringUtils(org.apache.commons.lang3.StringUtils) SOURCE_TAXON_ORDER(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_ORDER) InteractType(org.eol.globi.domain.InteractType) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) Matcher(java.util.regex.Matcher) Pair(org.apache.commons.lang3.tuple.Pair) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DECIMAL_LONGITUDE(org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LONGITUDE) TARGET_TAXON_ID(org.eol.globi.service.TaxonUtil.TARGET_TAXON_ID) Map(java.util.Map) SOURCE_TAXON_ID(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_ID) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) TARGET_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_LIFE_STAGE_NAME) MapDBUtil(org.globalbioticinteractions.util.MapDBUtil) Path(java.nio.file.Path) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) ArchiveFile(org.gbif.dwc.ArchiveFile) LOCALITY_NAME(org.eol.globi.data.DatasetImporterForTSV.LOCALITY_NAME) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) TARGET_TAXON_PATH_NAMES(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH_NAMES) List(java.util.List) SOURCE_TAXON_GENUS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_GENUS) SOURCE_TAXON_SPECIFIC_EPITHET(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_SPECIFIC_EPITHET) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) TARGET_TAXON_GENUS(org.eol.globi.service.TaxonUtil.TARGET_TAXON_GENUS) CitationUtil(org.globalbioticinteractions.dataset.CitationUtil) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) DatasetConstant(org.globalbioticinteractions.dataset.DatasetConstant) HashMap(java.util.HashMap) SOURCE_TAXON_CLASS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_CLASS) TARGET_TAXON_CLASS(org.eol.globi.service.TaxonUtil.TARGET_TAXON_CLASS) ArrayList(java.util.ArrayList) SOURCE_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_SEX_NAME) TARGET_TAXON_KINGDOM(org.eol.globi.service.TaxonUtil.TARGET_TAXON_KINGDOM) TARGET_TAXON_SPECIFIC_EPITHET(org.eol.globi.service.TaxonUtil.TARGET_TAXON_SPECIFIC_EPITHET) InteractionListenerClosable(org.eol.globi.process.InteractionListenerClosable) SOURCE_TAXON_KINGDOM(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_KINGDOM) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) BASIS_OF_RECORD_NAME(org.eol.globi.data.DatasetImporterForTSV.BASIS_OF_RECORD_NAME) ClosableIterator(org.gbif.utils.file.ClosableIterator) TARGET_TAXON_ORDER(org.eol.globi.service.TaxonUtil.TARGET_TAXON_ORDER) Iterator(java.util.Iterator) MalformedURLException(java.net.MalformedURLException) Files(java.nio.file.Files) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileUtils(org.apache.commons.io.FileUtils) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) LOCALITY_ID(org.eol.globi.data.DatasetImporterForTSV.LOCALITY_ID) SOURCE_TAXON_PHYLUM(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_PHYLUM) TARGET_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.TARGET_TAXON_FAMILY) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) DECIMAL_LATITUDE(org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LATITUDE) DcTerm(org.gbif.dwc.terms.DcTerm) TARGET_TAXON_PATH(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH) TARGET_BODY_PART_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_ID) TARGET_TAXON_PHYLUM(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PHYLUM) DB(org.mapdb.DB) Collections(java.util.Collections) InputStream(java.io.InputStream) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) Record(org.gbif.dwc.record.Record) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) DcTerm(org.gbif.dwc.terms.DcTerm) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) Map(java.util.Map) HashMap(java.util.HashMap) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) DwcTerm(org.gbif.dwc.terms.DwcTerm)

Aggregations

JsonNode (com.fasterxml.jackson.databind.JsonNode)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 File (java.io.File)2 IOException (java.io.IOException)2 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 URL (java.net.URL)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 TreeMap (java.util.TreeMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 MapUtils (org.apache.commons.collections4.MapUtils)2 StringUtils (org.apache.commons.lang3.StringUtils)2 DATASET_CITATION (org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION)2 INTERACTION_TYPE_ID (org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID)2 INTERACTION_TYPE_NAME (org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME)2