Example 1 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class ExtensionManagerImpl method migrateResourceToNewExtensionVersion.

 * Migrate a resource's extension mappings to an extension to a newer version of that extension.
 * @param r       resource whose mappings must be migrated
 * @param current extension
 * @param newer   newer version of extension to migrate mappings to
protected void migrateResourceToNewExtensionVersion(Resource r, Extension current, Extension newer) {
    // sanity check that the current and newer extensions share same rowType
    if (!current.getRowType().equalsIgnoreCase(newer.getRowType()) || r.getMappings(current.getRowType()).isEmpty()) {
        throw new IllegalStateException();
    }"Migrating " + r.getShortname() + " mappings to extension " + current.getRowType() + " to latest extension version");
    // populate various set to keep track of how many terms were deprecated, how terms' vocabulary was updated, etc
    Set<ExtensionProperty> deprecated = new HashSet<>();
    Set<ExtensionProperty> vocabulariesRemoved = new HashSet<>();
    Set<ExtensionProperty> vocabulariesUnchanged = new HashSet<>();
    Set<ExtensionProperty> vocabulariesUpdated = new HashSet<>();
    for (ExtensionProperty property : current.getProperties()) {
        // newer extension still contain this property?
        if (!newer.hasProperty(property.qualifiedName())) {
        } else // if so, check if this property uses a vocabulary, and whether the newer extension uses a newer version of it
            if (property.getVocabulary() != null) {
                Vocabulary v1 = property.getVocabulary();
                Vocabulary v2 = newer.getProperty(property.qualifiedName()).getVocabulary();
                // case 1: vocabulary removed in newer version
                if (v2 == null) {
                } else // case 2: vocabulary versions are unchanged between versions
                if (v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
                } else // case 3: vocabulary has been updated in newer version
                if (!v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
    LOG.debug(deprecated.size() + " properties have been deprecated in the newer version");
    LOG.debug(vocabulariesRemoved.size() + " properties in the newer version of extension no longer use a vocabulary");
    LOG.debug(vocabulariesUnchanged.size() + " properties in the newer version of extension use the same vocabulary");
    LOG.debug(vocabulariesUpdated.size() + " properties in the newer version of extension use a newer vocabulary");
    // set of new terms (terms to add)
    Set<ExtensionProperty> added = new HashSet<>();
    for (ExtensionProperty property : newer.getProperties()) {
        // older extension contain this property?
        if (!current.hasProperty(property.qualifiedName())) {
    LOG.debug("Newer version of extension has " + added.size() + " new properties");
    for (ExtensionMapping extensionMapping : r.getMappings(current.getRowType())) {
        migrateExtensionMapping(extensionMapping, newer, deprecated);
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) Vocabulary(org.gbif.ipt.model.Vocabulary) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) HashSet(java.util.HashSet)

Example 2 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class ExtensionManagerImpl method migrateExtensionMapping.

 * Migrate an ExtensionMapping to use a newer version of that extension:
 * 1. Migrate property mappings for deprecated terms that have been replaced by another term. Careful, the replacing
 * term must be included in the newer extension version, and cannot already be mapped
 * 2. Remove property mappings for deprecated terms that have NOT been replaced by another term
 * @param extensionMapping ExtensionMapping to migrate to use newer version of Extension
 * @param newer            newer version of Extension ExtensionMapping is based on
 * @param deprecated       set of ExtensionProperty deprecated in newer version of Extension
private ExtensionMapping migrateExtensionMapping(ExtensionMapping extensionMapping, Extension newer, Set<ExtensionProperty> deprecated) {
    LOG.debug("Migrating extension mapping...");
    // update Extension
    // migrate or remove property mappings to deprecated terms
    for (ExtensionProperty deprecatedProperty : deprecated) {
        Term replacedBy = TERMS_REPLACED_BY_ANOTHER_TERM.get(deprecatedProperty.qualifiedName());
        // replacing term must exist in new extension, and it cannot already be mapped!
        if (replacedBy != null && newer.getProperty(replacedBy) != null && !extensionMapping.isMapped(replacedBy)) {
            PropertyMapping pm = extensionMapping.getField(deprecatedProperty.qualifiedName());
            ExtensionProperty ep = newer.getProperty(replacedBy);
            if (pm != null && ep != null) {
                LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " has been migrated to term " + replacedBy.qualifiedName());
        } else // otherwise simply remove the property mapping
            LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " cannot be migrated therefore it is being removed!");
            removePropertyMapping(extensionMapping, deprecatedProperty.qualifiedName());
    return extensionMapping;
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) DcTerm(org.gbif.dwc.terms.DcTerm)

Example 3 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method assignIndexesOrderedByExtension.

 * Iterate through ordered list of those ExtensionProperty that have been mapped, and reassign the ArchiveFile
 * ArchiveField indexes, based on the order of their appearance in the ordered list be careful to reserve index 0 for
 * the ID column
 * @param propertyList ordered list of those ExtensionProperty that have been mapped
 * @param af ArchiveFile
private void assignIndexesOrderedByExtension(List<ExtensionProperty> propertyList, ArchiveFile af) {
    for (int propertyIndex = 0; propertyIndex < propertyList.size(); propertyIndex++) {
        ExtensionProperty extensionProperty = propertyList.get(propertyIndex);
        // retrieve the dwc-api Term corresponding to ExtensionProperty
        Term term = TERM_FACTORY.findTerm(extensionProperty.getQualname());
        // lookup ArchiveField using dwc-api Term
        ArchiveField f = af.getField(term);
        if (f != null && f.getIndex() == null) {
            // create new field index corresponding to its position in ordered list of columns indexed
            // +1 because index 0 is reserved for ID column
            int fieldIndex = propertyIndex + 1;
            // assign ArchiveField new index so that meta.xml file mirrors the ordered field order
        } else {
            log.warn("Skipping ExtensionProperty: " + extensionProperty.getQualname());
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) ArchiveField(org.gbif.dwc.ArchiveField)

Example 4 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method addFieldsToArchive.

 * First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
 * term a field in the final archive. Static/default mappings are not stored for a field, since they are not
 * expressed in meta.xml but instead get written to the data file.
 * @param mappings list of ExtensionMapping
 * @param af ArchiveFile
 * @return set of conceptTerms that have been mapped (in all files) for a single Extension
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
    Set<Term> mappedConceptTerms = new HashSet<>();
    for (ExtensionMapping m : mappings) {
        // multi-value field delimiter, part of each source data configuration
        String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
        for (PropertyMapping pm : m.getFields()) {
            Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
            // ensure Extension has concept term
            if (term != null && m.getExtension().getProperty(term) != null) {
                if (af.hasTerm(term)) {
                    ArchiveField field = af.getField(term);
                    // multi-value delimiter must be same across all sources
                    if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
                        throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
                } else {
                    if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
                        log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
                        af.addField(buildField(term, delimitedBy));
        // if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
        ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
        if (ep != null && m.isDoiUsedForDatasetId()) {
            log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
            // include datasetID field in ArchiveFile
            ArchiveField f = buildField(DwcTerm.datasetID, null);
            // include datasetID field mapping in ExtensionMapping
            PropertyMapping pm = new PropertyMapping(f);
            // include datasetID in set of all terms mapped for Extension
    return mappedConceptTerms;
Also used : ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) ArchiveField(org.gbif.dwc.ArchiveField) HashSet(java.util.HashSet)

Example 5 with ExtensionProperty

use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.

the class GenerateDwca method addDataFile.

 * Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
 * </br>
 * The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
 * mapped (e.g. occurrenceID, taxonID, etc).
 * @param mappings list of ExtensionMapping
 * @param rowLimit maximum number of rows to write
 * @throws IllegalArgumentException if not all mappings are mapped to the same extension
 * @throws InterruptedException if the thread was interrupted
 * @throws IOException if problems occurred while persisting new data files
 * @throws GeneratorException if any problem was encountered writing data file
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
    if (mappings == null || mappings.isEmpty()) {
    // update reporting
    currRecords = 0;
    currRecordsSkipped = 0;
    Extension ext = mappings.get(0).getExtension();
    currExtension = ext.getTitle();
    // verify that all mappings share this extension
    for (ExtensionMapping m : mappings) {
        if (!ext.equals(m.getExtension())) {
            throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
    // create new tab file with the help of the Archive class representing the core file or an extension
    ArchiveFile af = ArchiveFile.buildTabFile();
    // in the generated file column 0 will be the id column
    ArchiveField idField = new ArchiveField();
    // find the union of all terms mapped and make them a field in the final archive
    Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
    // retrieve the ordered list of mapped ExtensionProperty
    List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
    // reassign indexes ordered by Extension
    assignIndexesOrderedByExtension(propertyList, af);
    // total column count is equal to id column + mapped columns
    int totalColumns = 1 + propertyList.size();
    // create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
    // taxon2.txt, taxon3.txt)
    String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
    String fn = createFileName(dwcaFolder, extensionName);
    // open new file writer for single data file
    File dataFile = new File(dwcaFolder, fn);
    // ready to go though each mapping and dump the data
    try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
        addMessage(Level.INFO, "Start writing data file for " + currExtension);
        boolean headerWritten = false;
        for (ExtensionMapping m : mappings) {
            // prepare index ordered list of all output columns apart from id column
            PropertyMapping[] inCols = new PropertyMapping[totalColumns];
            for (ArchiveField f : af.getFields().values()) {
                if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
                    inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
            // write header line 1 time only to file
            if (!headerWritten) {
                writeHeaderLine(propertyList, totalColumns, af, writer);
                headerWritten = true;
            // write data (records) to file
            dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
            // store record number by extension rowType
            recordsByExtension.put(ext.getRowType(), currRecords);
    } catch (IOException e) {
        // some error writing this file, report
        log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
        // set last error report!
        throw new GeneratorException("Error writing header line to data file", e);
    // add archive file to archive
    if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
    } else {
    // final reporting
    addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
    // how many records were skipped?
    if (currRecordsSkipped > 0) {
        addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
Also used : DwcTerm(org.gbif.dwc.terms.DwcTerm) Term(org.gbif.dwc.terms.Term) IOException( ArchiveFile(org.gbif.dwc.ArchiveFile) Extension(org.gbif.ipt.model.Extension) ExtensionProperty(org.gbif.ipt.model.ExtensionProperty) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) PropertyMapping(org.gbif.ipt.model.PropertyMapping) ArchiveFile(org.gbif.dwc.ArchiveFile) File( ArchiveField(org.gbif.dwc.ArchiveField) MetaDescriptorWriter(org.gbif.dwc.MetaDescriptorWriter) PrintWriter( Writer( StringWriter(


