Search in sources :

Example 6 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DwCAUtilTest method assertHasRecords.

private void assertHasRecords(Archive dwcArchive) {
    // Loop over core core records and display id, basis of record and scientific name
    boolean hasRecords = false;
    for (Record rec : dwcArchive.getCore()) {
        hasRecords = true;
        break;
    }
    assertThat(hasRecords, is(true));
}
Also used : Record(org.gbif.dwc.record.Record)

Example 7 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importTaxaExtension.

private static void importTaxaExtension(InteractionListener interactionListener, ArchiveFile extension, ArchiveFile core, BTreeMap<String, Map<String, String>> associationsMap) {
    for (Record record : extension) {
        Map<String, String> props = new TreeMap<>();
        termsToMap(record, props);
        associationsMap.put(record.id(), props);
    }
    for (Record coreRecord : core) {
        String id = coreRecord.id();
        if (contains(associationsMap, id)) {
            try {
                Map<String, String> targetProperties = associationsMap.get(id);
                TreeMap<String, String> interaction = new TreeMap<>();
                mapAssociationProperties(targetProperties, interaction);
                mapCoreProperties(coreRecord, interaction);
                interaction.put(RESOURCE_TYPES, StringUtils.join(Arrays.asList(core.getRowType().qualifiedName(), extension.getRowType().qualifiedName()), CharsetConstant.SEPARATOR));
                interactionListener.on(interaction);
            } catch (StudyImporterException e) {
            // 
            }
        }
    }
}
Also used : Record(org.gbif.dwc.record.Record) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap)

Example 8 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importCore.

private int importCore(Archive archive, InteractionListener interactionListener) throws StudyImporterException {
    AtomicInteger recordCounter = new AtomicInteger(0);
    ClosableIterator<Record> iterator = archive.getCore().iterator();
    while (true) {
        try {
            if (!iterator.hasNext()) {
                break;
            }
            Record rec = iterator.next();
            handleRecord(interactionListener, rec);
            recordCounter.incrementAndGet();
        } catch (IllegalStateException ex) {
            LogUtil.logError(getLogger(), "failed to handle dwc record", ex);
        }
    }
    return recordCounter.get();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Record(org.gbif.dwc.record.Record)

Example 9 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method collectRelatedResourceIds.

private static void collectRelatedResourceIds(ArchiveFile resourceExtension, Set<String> referencedSourceIds, Set<String> referencedTargetIds) {
    for (Record record : resourceExtension) {
        String targetId = record.value(DwcTerm.relatedResourceID);
        String sourceId = record.value(DwcTerm.resourceID);
        String relationshipRemarks = record.value(DwcTerm.relationshipRemarks);
        if (StringUtils.isNotBlank(sourceId)) {
            if (StringUtils.isNotBlank(targetId)) {
                referencedSourceIds.add(sourceId);
                referencedTargetIds.add(targetId);
            } else if (StringUtils.contains(relationshipRemarks, "scientificName:")) {
                referencedSourceIds.add(sourceId);
            }
        }
    }
}
Also used : Record(org.gbif.dwc.record.Record)

Example 10 with Record

use of org.gbif.dwc.record.Record in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importInteractionsFromResourceRelationships.

private static void importInteractionsFromResourceRelationships(InteractionListener interactionListener, ArchiveFile resourceExtension, Map<String, Map<String, Map<String, String>>> termTypeIdPropMap, List<DwcTerm> termTypes) {
    for (Record record : resourceExtension) {
        Map<String, String> props = new TreeMap<>();
        appendResourceType(props, resourceExtension.getRowType());
        String sourceId = record.value(DwcTerm.resourceID);
        String relationship = record.value(DwcTerm.relationshipOfResource);
        Optional<Term> relationshipOfResourceIDTerm = record.terms().stream().filter(x -> StringUtils.equals(x.simpleName(), "relationshipOfResourceID")).findFirst();
        String relationshipTypeIdValue = relationshipOfResourceIDTerm.map(record::value).orElse(null);
        String targetId = record.value(DwcTerm.relatedResourceID);
        if (StringUtils.isNotBlank(sourceId)) {
            appendVerbatimResourceRelationsValues(record, props);
            String relationshipAccordingTo = record.value(DwcTerm.relationshipAccordingTo);
            if (StringUtils.isNotBlank(relationshipAccordingTo)) {
                props.putIfAbsent(REFERENCE_CITATION, relationshipAccordingTo);
            }
            putIfAbsentAndNotBlank(props, INTERACTION_TYPE_NAME, relationship);
            putIfAbsentAndNotBlank(props, INTERACTION_TYPE_ID, relationshipTypeIdValue);
            putIfAbsentAndNotBlank(props, DatasetImporterForMetaTable.EVENT_DATE, record.value(DwcTerm.relationshipEstablishedDate));
            for (DwcTerm termType : termTypes) {
                String key = termType.qualifiedName();
                if (StringUtils.isNoneBlank(key) && termTypeIdPropMap.containsKey(key)) {
                    Map<String, Map<String, String>> propMap = termTypeIdPropMap.get(termType.qualifiedName());
                    populatePropertiesAssociatedWithId(props, sourceId, true, propMap.get(sourceId), labelPairFor(termType));
                    extractNameFromRelationshipRemarks(record).ifPresent(name -> props.put(TARGET_TAXON_NAME, name));
                    populatePropertiesAssociatedWithId(props, targetId, false, propMap.get(targetId), labelPairFor(termType));
                }
            }
            try {
                interactionListener.on(props);
            } catch (StudyImporterException e) {
            // 
            }
        }
    }
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CacheUtil(org.globalbioticinteractions.cache.CacheUtil) Arrays(java.util.Arrays) INTERACTION_TYPE_NAME(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) INTERACTION_TYPE_ID(org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID) InteractionListener(org.eol.globi.process.InteractionListener) ExternalIdUtil(org.eol.globi.util.ExternalIdUtil) SOURCE_TAXON_SUBGENUS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_SUBGENUS) StringUtils(org.apache.commons.lang3.StringUtils) SOURCE_TAXON_ORDER(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_ORDER) InteractType(org.eol.globi.domain.InteractType) REFERENCE_URL(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL) Matcher(java.util.regex.Matcher) Pair(org.apache.commons.lang3.tuple.Pair) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DECIMAL_LONGITUDE(org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LONGITUDE) TARGET_TAXON_ID(org.eol.globi.service.TaxonUtil.TARGET_TAXON_ID) Map(java.util.Map) SOURCE_TAXON_ID(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_ID) SOURCE_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) TARGET_LIFE_STAGE_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_LIFE_STAGE_NAME) MapDBUtil(org.globalbioticinteractions.util.MapDBUtil) Path(java.nio.file.Path) TARGET_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID) SOURCE_TAXON_NAME(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME) DwcTerm(org.gbif.dwc.terms.DwcTerm) TARGET_FIELD_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_FIELD_NUMBER) ArchiveFile(org.gbif.dwc.ArchiveFile) LOCALITY_NAME(org.eol.globi.data.DatasetImporterForTSV.LOCALITY_NAME) REFERENCE_ID(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_ID) Record(org.gbif.dwc.record.Record) Set(java.util.Set) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) TARGET_TAXON_PATH_NAMES(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH_NAMES) List(java.util.List) SOURCE_TAXON_GENUS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_GENUS) SOURCE_TAXON_SPECIFIC_EPITHET(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_SPECIFIC_EPITHET) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) TARGET_BODY_PART_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME) DATASET_CITATION(org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION) TARGET_TAXON_GENUS(org.eol.globi.service.TaxonUtil.TARGET_TAXON_GENUS) CitationUtil(org.globalbioticinteractions.dataset.CitationUtil) TARGET_CATALOG_NUMBER(org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER) SOURCE_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_FAMILY) DatasetConstant(org.globalbioticinteractions.dataset.DatasetConstant) HashMap(java.util.HashMap) SOURCE_TAXON_CLASS(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_CLASS) TARGET_TAXON_CLASS(org.eol.globi.service.TaxonUtil.TARGET_TAXON_CLASS) ArrayList(java.util.ArrayList) SOURCE_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.SOURCE_SEX_NAME) TARGET_TAXON_KINGDOM(org.eol.globi.service.TaxonUtil.TARGET_TAXON_KINGDOM) TARGET_TAXON_SPECIFIC_EPITHET(org.eol.globi.service.TaxonUtil.TARGET_TAXON_SPECIFIC_EPITHET) InteractionListenerClosable(org.eol.globi.process.InteractionListenerClosable) SOURCE_TAXON_KINGDOM(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_KINGDOM) RESOURCE_TYPES(org.eol.globi.data.DatasetImporterForTSV.RESOURCE_TYPES) SOURCE_OCCURRENCE_ID(org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID) Term(org.gbif.dwc.terms.Term) MapUtils(org.apache.commons.collections4.MapUtils) BASIS_OF_RECORD_NAME(org.eol.globi.data.DatasetImporterForTSV.BASIS_OF_RECORD_NAME) ClosableIterator(org.gbif.utils.file.ClosableIterator) TARGET_TAXON_ORDER(org.eol.globi.service.TaxonUtil.TARGET_TAXON_ORDER) Iterator(java.util.Iterator) MalformedURLException(java.net.MalformedURLException) Files(java.nio.file.Files) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileUtils(org.apache.commons.io.FileUtils) Archive(org.gbif.dwc.Archive) DwCAUtil(org.globalbioticinteractions.dataset.DwCAUtil) IOException(java.io.IOException) TARGET_TAXON_NAME(org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME) File(java.io.File) LOCALITY_ID(org.eol.globi.data.DatasetImporterForTSV.LOCALITY_ID) SOURCE_TAXON_PHYLUM(org.eol.globi.service.TaxonUtil.SOURCE_TAXON_PHYLUM) TARGET_TAXON_FAMILY(org.eol.globi.service.TaxonUtil.TARGET_TAXON_FAMILY) TARGET_SEX_NAME(org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) DECIMAL_LATITUDE(org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LATITUDE) DcTerm(org.gbif.dwc.terms.DcTerm) TARGET_TAXON_PATH(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH) TARGET_BODY_PART_ID(org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_ID) TARGET_TAXON_PHYLUM(org.eol.globi.service.TaxonUtil.TARGET_TAXON_PHYLUM) DB(org.mapdb.DB) Collections(java.util.Collections) InputStream(java.io.InputStream) REFERENCE_CITATION(org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION) Record(org.gbif.dwc.record.Record) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) DcTerm(org.gbif.dwc.terms.DcTerm) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) Map(java.util.Map) HashMap(java.util.HashMap) BTreeMap(org.mapdb.BTreeMap) TreeMap(java.util.TreeMap) DwcTerm(org.gbif.dwc.terms.DwcTerm)

Aggregations

Record (org.gbif.dwc.record.Record)10 TreeMap (java.util.TreeMap)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 ArrayList (java.util.ArrayList)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 JsonNode (com.fasterxml.jackson.databind.JsonNode)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 File (java.io.File)3 IOException (java.io.IOException)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 URL (java.net.URL)3 Arrays (java.util.Arrays)3 List (java.util.List)3 Set (java.util.Set)3 MapUtils (org.apache.commons.collections4.MapUtils)3 StringUtils (org.apache.commons.lang3.StringUtils)3 DATASET_CITATION (org.eol.globi.data.DatasetImporterForTSV.DATASET_CITATION)3 INTERACTION_TYPE_ID (org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID)3