Search in sources :

Example 1 with PropertyMapping

use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.

the class ExtensionManagerImpl method removePropertyMapping.

/**
 * Remove a PropertyMapping from an ExtensionMapping.
 *
 * @param extensionMapping ExtensionMapping
 * @param qualifiedName    of PropertyMapping term to remove
 */
private void removePropertyMapping(ExtensionMapping extensionMapping, String qualifiedName) {
    PropertyMapping pm = extensionMapping.getField(qualifiedName);
    Set<PropertyMapping> propertyMappings = extensionMapping.getFields();
    if (pm != null && propertyMappings.contains(pm)) {
        propertyMappings.remove(pm);
        LOG.debug("Removed mapping to term " + pm.getTerm().qualifiedName());
    }
}
Also used : PropertyMapping(org.gbif.ipt.model.PropertyMapping)

Example 2 with PropertyMapping

use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.

the class ExtensionManagerImpl method migrateExtensionMapping.

/**
 * Migrate an ExtensionMapping to use a newer version of that extension:
 * 1. Migrate property mappings for deprecated terms that have been replaced by another term. Careful, the replacing
 * term must be included in the newer extension version, and cannot already be mapped
 * 2. Remove property mappings for deprecated terms that have NOT been replaced by another term
 *
 * @param extensionMapping ExtensionMapping to migrate to use newer version of Extension
 * @param newer            newer version of Extension ExtensionMapping is based on
 * @param deprecated       set of ExtensionProperty deprecated in newer version of Extension
 */
private ExtensionMapping migrateExtensionMapping(ExtensionMapping extensionMapping, Extension newer, Set<ExtensionProperty> deprecated) {
    LOG.debug("Migrating extension mapping...");
    // update Extension
    extensionMapping.setExtension(newer);
    // migrate or remove property mappings to deprecated terms
    for (ExtensionProperty deprecatedProperty : deprecated) {
        Term replacedBy = TERMS_REPLACED_BY_ANOTHER_TERM.get(deprecatedProperty.qualifiedName());
        // replacing term must exist in new extension, and it cannot already be mapped!
        if (replacedBy != null && newer.getProperty(replacedBy) != null && !extensionMapping.isMapped(replacedBy)) {
            PropertyMapping pm = extensionMapping.getField(deprecatedProperty.qualifiedName());
            ExtensionProperty ep = newer.getProperty(replacedBy);
            if (pm != null && ep != null) {
                pm.setTerm(ep);
                LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " has been migrated to term " + replacedBy.qualifiedName());
            }
        } else // otherwise simply remove the property mapping
        {
            LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " cannot be migrated therefore it is being removed!");
            removePropertyMapping(extensionMapping, deprecatedProperty.qualifiedName());
        }
    }
    return extensionMapping;
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) DcTerm(org.gbif.dwc.terms.DcTerm)

Example 3 with PropertyMapping

use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.

the class GenerateDwca method applyTranslations.

/**
 * Apply translations or default values to row, for all mapped properties.
 * </br>
 * The method starts by iterating through all mapped properties, checking each one if it has been translated or a
 * default value provided. The original value in the row is then replaced with the translated or default value.
 * A record array representing the values to be written to the data file is also updated.
 *
 * @param inCols values array, of columns in row that have been mapped
 * @param in values array, of all columns in row
 * @param doiUsedForDatasetId true if mapping should use resource DOI as datasetID, false otherwise
 * @param doi DOI assigned to resource
 */
private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] record, boolean doiUsedForDatasetId, DOI doi) {
    for (int i = 1; i < inCols.length; i++) {
        PropertyMapping pm = inCols[i];
        String val = null;
        if (pm != null) {
            if (pm.getIndex() != null) {
                val = in[pm.getIndex()];
                // translate value?
                if (pm.getTranslation() != null && pm.getTranslation().containsKey(val)) {
                    val = pm.getTranslation().get(val);
                    // update value in original record
                    in[pm.getIndex()] = val;
                }
            }
            // use default value for null values
            if (val == null) {
                val = pm.getDefaultValue();
            }
            // use DOI for datasetID property?
            if (pm.getTerm().qualifiedName().equalsIgnoreCase(Constants.DWC_DATASET_ID) && doiUsedForDatasetId && doi != null) {
                val = doi.getDoiString();
            }
        }
        // add value to data file record
        record[i] = val;
    }
}
Also used : PropertyMapping(org.gbif.ipt.model.PropertyMapping)

Example 4 with PropertyMapping

use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.

the class GenerateDwca method addFieldsToArchive.

/**
 * First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
 * term a field in the final archive. Static/default mappings are not stored for a field, since they are not
 * expressed in meta.xml but instead get written to the data file.
 *
 * @param mappings list of ExtensionMapping
 * @param af ArchiveFile
 *
 * @return set of conceptTerms that have been mapped (in all files) for a single Extension
 */
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
    Set<Term> mappedConceptTerms = new HashSet<>();
    for (ExtensionMapping m : mappings) {
        // multi-value field delimiter, part of each source data configuration
        String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
        for (PropertyMapping pm : m.getFields()) {
            Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
            // ensure Extension has concept term
            if (term != null && m.getExtension().getProperty(term) != null) {
                if (af.hasTerm(term)) {
                    ArchiveField field = af.getField(term);
                    mappedConceptTerms.add(term);
                    // multi-value delimiter must be same across all sources
                    if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
                        throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
                    }
                } else {
                    if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
                        log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
                        af.addField(buildField(term, delimitedBy));
                        mappedConceptTerms.add(term);
                    }
                }
            }
        }
        // if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
        ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
        if (ep != null && m.isDoiUsedForDatasetId()) {
            log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
            // include datasetID field in ArchiveFile
            ArchiveField f = buildField(DwcTerm.datasetID, null);
            af.addField(f);
            // include datasetID field mapping in ExtensionMapping
            PropertyMapping pm = new PropertyMapping(f);
            pm.setTerm(ep);
            m.getFields().add(pm);
            // include datasetID in set of all terms mapped for Extension
            mappedConceptTerms.add(DwcTerm.datasetID);
        }
    }
    return mappedConceptTerms;
}
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) ArchiveField(org.gbif.dwc.ArchiveField) HashSet(java.util.HashSet)

Example 5 with PropertyMapping

use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.

the class GenerateDwca method addDataFile.

/**
 * Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
 * </br>
 * The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
 * mapped (e.g. occurrenceID, taxonID, etc).
 *
 * @param mappings list of ExtensionMapping
 * @param rowLimit maximum number of rows to write
 * @throws IllegalArgumentException if not all mappings are mapped to the same extension
 * @throws InterruptedException if the thread was interrupted
 * @throws IOException if problems occurred while persisting new data files
 * @throws GeneratorException if any problem was encountered writing data file
 */
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
    checkForInterruption();
    if (mappings == null || mappings.isEmpty()) {
        return;
    }
    // update reporting
    currRecords = 0;
    currRecordsSkipped = 0;
    Extension ext = mappings.get(0).getExtension();
    currExtension = ext.getTitle();
    // verify that all mappings share this extension
    for (ExtensionMapping m : mappings) {
        if (!ext.equals(m.getExtension())) {
            throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
        }
    }
    // create new tab file with the help of the Archive class representing the core file or an extension
    ArchiveFile af = ArchiveFile.buildTabFile();
    af.setRowType(TERM_FACTORY.findTerm(ext.getRowType()));
    af.setEncoding(CHARACTER_ENCODING);
    af.setDateFormat("YYYY-MM-DD");
    // in the generated file column 0 will be the id column
    ArchiveField idField = new ArchiveField();
    idField.setIndex(ID_COLUMN_INDEX);
    af.setId(idField);
    // find the union of all terms mapped and make them a field in the final archive
    Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
    // retrieve the ordered list of mapped ExtensionProperty
    List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
    // reassign indexes ordered by Extension
    assignIndexesOrderedByExtension(propertyList, af);
    // total column count is equal to id column + mapped columns
    int totalColumns = 1 + propertyList.size();
    // create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
    // taxon2.txt, taxon3.txt)
    String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
    String fn = createFileName(dwcaFolder, extensionName);
    // open new file writer for single data file
    File dataFile = new File(dwcaFolder, fn);
    // ready to go though each mapping and dump the data
    try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
        af.addLocation(dataFile.getName());
        addMessage(Level.INFO, "Start writing data file for " + currExtension);
        boolean headerWritten = false;
        for (ExtensionMapping m : mappings) {
            // prepare index ordered list of all output columns apart from id column
            PropertyMapping[] inCols = new PropertyMapping[totalColumns];
            for (ArchiveField f : af.getFields().values()) {
                if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
                    inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
                }
            }
            // write header line 1 time only to file
            if (!headerWritten) {
                writeHeaderLine(propertyList, totalColumns, af, writer);
                headerWritten = true;
            }
            // write data (records) to file
            dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
            // store record number by extension rowType
            recordsByExtension.put(ext.getRowType(), currRecords);
        }
    } catch (IOException e) {
        // some error writing this file, report
        log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
        // set last error report!
        setState(e);
        throw new GeneratorException("Error writing header line to data file", e);
    }
    // add archive file to archive
    if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
        archive.setCore(af);
    } else {
        archive.addExtension(af);
    }
    // final reporting
    addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
    // how many records were skipped?
    if (currRecordsSkipped > 0) {
        addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
    }
}
Also used : DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) IOException(java.io.IOException) ArchiveFile(org.gbif.dwc.ArchiveFile) Extension(org.gbif.ipt.model.Extension) ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) ArchiveFile(org.gbif.dwc.ArchiveFile) File(java.io.File) ArchiveField(org.gbif.dwc.ArchiveField) MetaDescriptorWriter(org.gbif.dwc.MetaDescriptorWriter) PrintWriter(java.io.PrintWriter) Writer(java.io.Writer) StringWriter(java.io.StringWriter)

Aggregations

PropertyMapping (org.gbif.ipt.model.PropertyMapping)25 ExtensionMapping (org.gbif.ipt.model.ExtensionMapping)14 Extension (org.gbif.ipt.model.Extension)10 ExtensionProperty (org.gbif.ipt.model.ExtensionProperty)9 Resource (org.gbif.ipt.model.Resource)9 Test (org.junit.jupiter.api.Test)7 HashSet (java.util.HashSet)6 File (java.io.File)5 ArrayList (java.util.ArrayList)5 DwcTerm (org.gbif.dwc.terms.DwcTerm)5 Term (org.gbif.dwc.terms.Term)5 ResourceManager (org.gbif.ipt.service.manage.ResourceManager)5 ArchiveField (org.gbif.dwc.ArchiveField)4 TextFileSource (org.gbif.ipt.model.TextFileSource)4 TreeSet (java.util.TreeSet)3 BeforeEach (org.junit.jupiter.api.BeforeEach)3 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 HttpServletRequest (javax.servlet.http.HttpServletRequest)2