Search in sources :

Example 1 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class ExtensionManagerImpl method migrateResourceToNewExtensionVersion.

/**
 * Migrate a resource's extension mappings to an extension to a newer version of that extension.
 *
 * @param r       resource whose mappings must be migrated
 * @param current extension
 * @param newer   newer version of extension to migrate mappings to
 */
protected void migrateResourceToNewExtensionVersion(Resource r, Extension current, Extension newer) {
    // sanity check that the current and newer extensions share same rowType
    if (!current.getRowType().equalsIgnoreCase(newer.getRowType()) || r.getMappings(current.getRowType()).isEmpty()) {
        throw new IllegalStateException();
    }
    LOG.info("Migrating " + r.getShortname() + " mappings to extension " + current.getRowType() + " to latest extension version");
    // populate various set to keep track of how many terms were deprecated, how terms' vocabulary was updated, etc
    Set<ExtensionProperty> deprecated = new HashSet<>();
    Set<ExtensionProperty> vocabulariesRemoved = new HashSet<>();
    Set<ExtensionProperty> vocabulariesUnchanged = new HashSet<>();
    Set<ExtensionProperty> vocabulariesUpdated = new HashSet<>();
    for (ExtensionProperty property : current.getProperties()) {
        // newer extension still contain this property?
        if (!newer.hasProperty(property.qualifiedName())) {
            deprecated.add(property);
        } else // if so, check if this property uses a vocabulary, and whether the newer extension uses a newer version of it
        {
            if (property.getVocabulary() != null) {
                Vocabulary v1 = property.getVocabulary();
                Vocabulary v2 = newer.getProperty(property.qualifiedName()).getVocabulary();
                // case 1: vocabulary removed in newer version
                if (v2 == null) {
                    vocabulariesRemoved.add(property);
                } else // case 2: vocabulary versions are unchanged between versions
                if (v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
                    vocabulariesUnchanged.add(property);
                } else // case 3: vocabulary has been updated in newer version
                if (!v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
                    vocabulariesUpdated.add(property);
                }
            }
        }
    }
    LOG.debug(deprecated.size() + " properties have been deprecated in the newer version");
    LOG.debug(vocabulariesRemoved.size() + " properties in the newer version of extension no longer use a vocabulary");
    LOG.debug(vocabulariesUnchanged.size() + " properties in the newer version of extension use the same vocabulary");
    LOG.debug(vocabulariesUpdated.size() + " properties in the newer version of extension use a newer vocabulary");
    // set of new terms (terms to add)
    Set<ExtensionProperty> added = new HashSet<>();
    for (ExtensionProperty property : newer.getProperties()) {
        // older extension contain this property?
        if (!current.hasProperty(property.qualifiedName())) {
            added.add(property);
        }
    }
    LOG.debug("Newer version of extension has " + added.size() + " new properties");
    for (ExtensionMapping extensionMapping : r.getMappings(current.getRowType())) {
        migrateExtensionMapping(extensionMapping, newer, deprecated);
    }
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) Vocabulary(org.gbif.ipt.model.Vocabulary) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) HashSet(java.util.HashSet)

Example 2 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class ExtensionManagerImpl method migrateExtensionMapping.

/**
 * Migrate an ExtensionMapping to use a newer version of that extension:
 * 1. Migrate property mappings for deprecated terms that have been replaced by another term. Careful, the replacing
 * term must be included in the newer extension version, and cannot already be mapped
 * 2. Remove property mappings for deprecated terms that have NOT been replaced by another term
 *
 * @param extensionMapping ExtensionMapping to migrate to use newer version of Extension
 * @param newer            newer version of Extension ExtensionMapping is based on
 * @param deprecated       set of ExtensionProperty deprecated in newer version of Extension
 */
private ExtensionMapping migrateExtensionMapping(ExtensionMapping extensionMapping, Extension newer, Set<ExtensionProperty> deprecated) {
    LOG.debug("Migrating extension mapping...");
    // update Extension
    extensionMapping.setExtension(newer);
    // migrate or remove property mappings to deprecated terms
    for (ExtensionProperty deprecatedProperty : deprecated) {
        Term replacedBy = TERMS_REPLACED_BY_ANOTHER_TERM.get(deprecatedProperty.qualifiedName());
        // replacing term must exist in new extension, and it cannot already be mapped!
        if (replacedBy != null && newer.getProperty(replacedBy) != null && !extensionMapping.isMapped(replacedBy)) {
            PropertyMapping pm = extensionMapping.getField(deprecatedProperty.qualifiedName());
            ExtensionProperty ep = newer.getProperty(replacedBy);
            if (pm != null && ep != null) {
                pm.setTerm(ep);
                LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " has been migrated to term " + replacedBy.qualifiedName());
            }
        } else // otherwise simply remove the property mapping
        {
            LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " cannot be migrated therefore it is being removed!");
            removePropertyMapping(extensionMapping, deprecatedProperty.qualifiedName());
        }
    }
    return extensionMapping;
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) DcTerm(org.gbif.dwc.terms.DcTerm)

Example 3 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method assignIndexesOrderedByExtension.

/**
 * Iterate through ordered list of those ExtensionProperty that have been mapped, and reassign the ArchiveFile
 * ArchiveField indexes, based on the order of their appearance in the ordered list be careful to reserve index 0 for
 * the ID column
 *
 * @param propertyList ordered list of those ExtensionProperty that have been mapped
 * @param af ArchiveFile
 */
private void assignIndexesOrderedByExtension(List<ExtensionProperty> propertyList, ArchiveFile af) {
    for (int propertyIndex = 0; propertyIndex < propertyList.size(); propertyIndex++) {
        ExtensionProperty extensionProperty = propertyList.get(propertyIndex);
        // retrieve the dwc-api Term corresponding to ExtensionProperty
        Term term = TERM_FACTORY.findTerm(extensionProperty.getQualname());
        // lookup ArchiveField using dwc-api Term
        ArchiveField f = af.getField(term);
        if (f != null && f.getIndex() == null) {
            // create new field index corresponding to its position in ordered list of columns indexed
            // +1 because index 0 is reserved for ID column
            int fieldIndex = propertyIndex + 1;
            // assign ArchiveField new index so that meta.xml file mirrors the ordered field order
            f.setIndex(fieldIndex);
        } else {
            log.warn("Skipping ExtensionProperty: " + extensionProperty.getQualname());
        }
    }
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) ArchiveField(org.gbif.dwc.ArchiveField)

Example 4 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method addFieldsToArchive.

/**
 * First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
 * term a field in the final archive. Static/default mappings are not stored for a field, since they are not
 * expressed in meta.xml but instead get written to the data file.
 *
 * @param mappings list of ExtensionMapping
 * @param af ArchiveFile
 *
 * @return set of conceptTerms that have been mapped (in all files) for a single Extension
 */
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
    Set<Term> mappedConceptTerms = new HashSet<>();
    for (ExtensionMapping m : mappings) {
        // multi-value field delimiter, part of each source data configuration
        String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
        for (PropertyMapping pm : m.getFields()) {
            Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
            // ensure Extension has concept term
            if (term != null && m.getExtension().getProperty(term) != null) {
                if (af.hasTerm(term)) {
                    ArchiveField field = af.getField(term);
                    mappedConceptTerms.add(term);
                    // multi-value delimiter must be same across all sources
                    if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
                        throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
                    }
                } else {
                    if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
                        log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
                        af.addField(buildField(term, delimitedBy));
                        mappedConceptTerms.add(term);
                    }
                }
            }
        }
        // if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
        ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
        if (ep != null && m.isDoiUsedForDatasetId()) {
            log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
            // include datasetID field in ArchiveFile
            ArchiveField f = buildField(DwcTerm.datasetID, null);
            af.addField(f);
            // include datasetID field mapping in ExtensionMapping
            PropertyMapping pm = new PropertyMapping(f);
            pm.setTerm(ep);
            m.getFields().add(pm);
            // include datasetID in set of all terms mapped for Extension
            mappedConceptTerms.add(DwcTerm.datasetID);
        }
    }
    return mappedConceptTerms;
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) ArchiveField(org.gbif.dwc.ArchiveField) HashSet(java.util.HashSet)

Example 5 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method addDataFile.

/**
 * Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
 * </br>
 * The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
 * mapped (e.g. occurrenceID, taxonID, etc).
 *
 * @param mappings list of ExtensionMapping
 * @param rowLimit maximum number of rows to write
 * @throws IllegalArgumentException if not all mappings are mapped to the same extension
 * @throws InterruptedException if the thread was interrupted
 * @throws IOException if problems occurred while persisting new data files
 * @throws GeneratorException if any problem was encountered writing data file
 */
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
    checkForInterruption();
    if (mappings == null || mappings.isEmpty()) {
        return;
    }
    // update reporting
    currRecords = 0;
    currRecordsSkipped = 0;
    Extension ext = mappings.get(0).getExtension();
    currExtension = ext.getTitle();
    // verify that all mappings share this extension
    for (ExtensionMapping m : mappings) {
        if (!ext.equals(m.getExtension())) {
            throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
        }
    }
    // create new tab file with the help of the Archive class representing the core file or an extension
    ArchiveFile af = ArchiveFile.buildTabFile();
    af.setRowType(TERM_FACTORY.findTerm(ext.getRowType()));
    af.setEncoding(CHARACTER_ENCODING);
    af.setDateFormat("YYYY-MM-DD");
    // in the generated file column 0 will be the id column
    ArchiveField idField = new ArchiveField();
    idField.setIndex(ID_COLUMN_INDEX);
    af.setId(idField);
    // find the union of all terms mapped and make them a field in the final archive
    Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
    // retrieve the ordered list of mapped ExtensionProperty
    List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
    // reassign indexes ordered by Extension
    assignIndexesOrderedByExtension(propertyList, af);
    // total column count is equal to id column + mapped columns
    int totalColumns = 1 + propertyList.size();
    // create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
    // taxon2.txt, taxon3.txt)
    String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
    String fn = createFileName(dwcaFolder, extensionName);
    // open new file writer for single data file
    File dataFile = new File(dwcaFolder, fn);
    // ready to go though each mapping and dump the data
    try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
        af.addLocation(dataFile.getName());
        addMessage(Level.INFO, "Start writing data file for " + currExtension);
        boolean headerWritten = false;
        for (ExtensionMapping m : mappings) {
            // prepare index ordered list of all output columns apart from id column
            PropertyMapping[] inCols = new PropertyMapping[totalColumns];
            for (ArchiveField f : af.getFields().values()) {
                if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
                    inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
                }
            }
            // write header line 1 time only to file
            if (!headerWritten) {
                writeHeaderLine(propertyList, totalColumns, af, writer);
                headerWritten = true;
            }
            // write data (records) to file
            dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
            // store record number by extension rowType
            recordsByExtension.put(ext.getRowType(), currRecords);
        }
    } catch (IOException e) {
        // some error writing this file, report
        log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
        // set last error report!
        setState(e);
        throw new GeneratorException("Error writing header line to data file", e);
    }
    // add archive file to archive
    if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
        archive.setCore(af);
    } else {
        archive.addExtension(af);
    }
    // final reporting
    addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
    // how many records were skipped?
    if (currRecordsSkipped > 0) {
        addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
    }
}
Also used : DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) IOException(java.io.IOException) ArchiveFile(org.gbif.dwc.ArchiveFile) Extension(org.gbif.ipt.model.Extension) ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) ArchiveFile(org.gbif.dwc.ArchiveFile) File(java.io.File) ArchiveField(org.gbif.dwc.ArchiveField) MetaDescriptorWriter(org.gbif.dwc.MetaDescriptorWriter) PrintWriter(java.io.PrintWriter) Writer(java.io.Writer) StringWriter(java.io.StringWriter)

Aggregations

ExtensionProperty (org.gbif.ipt.model.ExtensionProperty)18 Extension (org.gbif.ipt.model.Extension)9 ExtensionMapping (org.gbif.ipt.model.ExtensionMapping)9 PropertyMapping (org.gbif.ipt.model.PropertyMapping)9 HashSet (java.util.HashSet)6 DwcTerm (org.gbif.dwc.terms.DwcTerm)5 Term (org.gbif.dwc.terms.Term)5 ArrayList (java.util.ArrayList)4 Test (org.junit.jupiter.api.Test)4 ArchiveField (org.gbif.dwc.ArchiveField)3 Resource (org.gbif.ipt.model.Resource)3 Vocabulary (org.gbif.ipt.model.Vocabulary)3 BeforeEach (org.junit.jupiter.api.BeforeEach)3 HttpServletRequest (javax.servlet.http.HttpServletRequest)2 ResourceManager (org.gbif.ipt.service.manage.ResourceManager)2 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)2 DefaultLocaleProviderFactory (com.opensymphony.xwork2.DefaultLocaleProviderFactory)1 LocaleProviderFactory (com.opensymphony.xwork2.LocaleProviderFactory)1 Container (com.opensymphony.xwork2.inject.Container)1 File (java.io.File)1