use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.
the class ExtensionManagerImpl method migrateResourceToNewExtensionVersion.
/**
* Migrate a resource's extension mappings to an extension to a newer version of that extension.
*
* @param r resource whose mappings must be migrated
* @param current extension
* @param newer newer version of extension to migrate mappings to
*/
protected void migrateResourceToNewExtensionVersion(Resource r, Extension current, Extension newer) {
// sanity check that the current and newer extensions share same rowType
if (!current.getRowType().equalsIgnoreCase(newer.getRowType()) || r.getMappings(current.getRowType()).isEmpty()) {
throw new IllegalStateException();
}
LOG.info("Migrating " + r.getShortname() + " mappings to extension " + current.getRowType() + " to latest extension version");
// populate various set to keep track of how many terms were deprecated, how terms' vocabulary was updated, etc
Set<ExtensionProperty> deprecated = new HashSet<>();
Set<ExtensionProperty> vocabulariesRemoved = new HashSet<>();
Set<ExtensionProperty> vocabulariesUnchanged = new HashSet<>();
Set<ExtensionProperty> vocabulariesUpdated = new HashSet<>();
for (ExtensionProperty property : current.getProperties()) {
// newer extension still contain this property?
if (!newer.hasProperty(property.qualifiedName())) {
deprecated.add(property);
} else // if so, check if this property uses a vocabulary, and whether the newer extension uses a newer version of it
{
if (property.getVocabulary() != null) {
Vocabulary v1 = property.getVocabulary();
Vocabulary v2 = newer.getProperty(property.qualifiedName()).getVocabulary();
// case 1: vocabulary removed in newer version
if (v2 == null) {
vocabulariesRemoved.add(property);
} else // case 2: vocabulary versions are unchanged between versions
if (v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
vocabulariesUnchanged.add(property);
} else // case 3: vocabulary has been updated in newer version
if (!v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
vocabulariesUpdated.add(property);
}
}
}
}
LOG.debug(deprecated.size() + " properties have been deprecated in the newer version");
LOG.debug(vocabulariesRemoved.size() + " properties in the newer version of extension no longer use a vocabulary");
LOG.debug(vocabulariesUnchanged.size() + " properties in the newer version of extension use the same vocabulary");
LOG.debug(vocabulariesUpdated.size() + " properties in the newer version of extension use a newer vocabulary");
// set of new terms (terms to add)
Set<ExtensionProperty> added = new HashSet<>();
for (ExtensionProperty property : newer.getProperties()) {
// older extension contain this property?
if (!current.hasProperty(property.qualifiedName())) {
added.add(property);
}
}
LOG.debug("Newer version of extension has " + added.size() + " new properties");
for (ExtensionMapping extensionMapping : r.getMappings(current.getRowType())) {
migrateExtensionMapping(extensionMapping, newer, deprecated);
}
}
use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.
the class ExtensionManagerImpl method migrateExtensionMapping.
/**
* Migrate an ExtensionMapping to use a newer version of that extension:
* 1. Migrate property mappings for deprecated terms that have been replaced by another term. Careful, the replacing
* term must be included in the newer extension version, and cannot already be mapped
* 2. Remove property mappings for deprecated terms that have NOT been replaced by another term
*
* @param extensionMapping ExtensionMapping to migrate to use newer version of Extension
* @param newer newer version of Extension ExtensionMapping is based on
* @param deprecated set of ExtensionProperty deprecated in newer version of Extension
*/
private ExtensionMapping migrateExtensionMapping(ExtensionMapping extensionMapping, Extension newer, Set<ExtensionProperty> deprecated) {
LOG.debug("Migrating extension mapping...");
// update Extension
extensionMapping.setExtension(newer);
// migrate or remove property mappings to deprecated terms
for (ExtensionProperty deprecatedProperty : deprecated) {
Term replacedBy = TERMS_REPLACED_BY_ANOTHER_TERM.get(deprecatedProperty.qualifiedName());
// replacing term must exist in new extension, and it cannot already be mapped!
if (replacedBy != null && newer.getProperty(replacedBy) != null && !extensionMapping.isMapped(replacedBy)) {
PropertyMapping pm = extensionMapping.getField(deprecatedProperty.qualifiedName());
ExtensionProperty ep = newer.getProperty(replacedBy);
if (pm != null && ep != null) {
pm.setTerm(ep);
LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " has been migrated to term " + replacedBy.qualifiedName());
}
} else // otherwise simply remove the property mapping
{
LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " cannot be migrated therefore it is being removed!");
removePropertyMapping(extensionMapping, deprecatedProperty.qualifiedName());
}
}
return extensionMapping;
}
use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.
the class GenerateDwca method assignIndexesOrderedByExtension.
/**
* Iterate through ordered list of those ExtensionProperty that have been mapped, and reassign the ArchiveFile
* ArchiveField indexes, based on the order of their appearance in the ordered list be careful to reserve index 0 for
* the ID column
*
* @param propertyList ordered list of those ExtensionProperty that have been mapped
* @param af ArchiveFile
*/
private void assignIndexesOrderedByExtension(List<ExtensionProperty> propertyList, ArchiveFile af) {
for (int propertyIndex = 0; propertyIndex < propertyList.size(); propertyIndex++) {
ExtensionProperty extensionProperty = propertyList.get(propertyIndex);
// retrieve the dwc-api Term corresponding to ExtensionProperty
Term term = TERM_FACTORY.findTerm(extensionProperty.getQualname());
// lookup ArchiveField using dwc-api Term
ArchiveField f = af.getField(term);
if (f != null && f.getIndex() == null) {
// create new field index corresponding to its position in ordered list of columns indexed
// +1 because index 0 is reserved for ID column
int fieldIndex = propertyIndex + 1;
// assign ArchiveField new index so that meta.xml file mirrors the ordered field order
f.setIndex(fieldIndex);
} else {
log.warn("Skipping ExtensionProperty: " + extensionProperty.getQualname());
}
}
}
use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.
the class GenerateDwca method addFieldsToArchive.
/**
* First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
* term a field in the final archive. Static/default mappings are not stored for a field, since they are not
* expressed in meta.xml but instead get written to the data file.
*
* @param mappings list of ExtensionMapping
* @param af ArchiveFile
*
* @return set of conceptTerms that have been mapped (in all files) for a single Extension
*/
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
Set<Term> mappedConceptTerms = new HashSet<>();
for (ExtensionMapping m : mappings) {
// multi-value field delimiter, part of each source data configuration
String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
for (PropertyMapping pm : m.getFields()) {
Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
// ensure Extension has concept term
if (term != null && m.getExtension().getProperty(term) != null) {
if (af.hasTerm(term)) {
ArchiveField field = af.getField(term);
mappedConceptTerms.add(term);
// multi-value delimiter must be same across all sources
if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
}
} else {
if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
af.addField(buildField(term, delimitedBy));
mappedConceptTerms.add(term);
}
}
}
}
// if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
if (ep != null && m.isDoiUsedForDatasetId()) {
log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
// include datasetID field in ArchiveFile
ArchiveField f = buildField(DwcTerm.datasetID, null);
af.addField(f);
// include datasetID field mapping in ExtensionMapping
PropertyMapping pm = new PropertyMapping(f);
pm.setTerm(ep);
m.getFields().add(pm);
// include datasetID in set of all terms mapped for Extension
mappedConceptTerms.add(DwcTerm.datasetID);
}
}
return mappedConceptTerms;
}
use of org.gbif.ipt.model.ExtensionProperty in project ipt by gbif.
the class GenerateDwca method addDataFile.
/**
* Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
* </br>
* The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
* mapped (e.g. occurrenceID, taxonID, etc).
*
* @param mappings list of ExtensionMapping
* @param rowLimit maximum number of rows to write
* @throws IllegalArgumentException if not all mappings are mapped to the same extension
* @throws InterruptedException if the thread was interrupted
* @throws IOException if problems occurred while persisting new data files
* @throws GeneratorException if any problem was encountered writing data file
*/
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
checkForInterruption();
if (mappings == null || mappings.isEmpty()) {
return;
}
// update reporting
currRecords = 0;
currRecordsSkipped = 0;
Extension ext = mappings.get(0).getExtension();
currExtension = ext.getTitle();
// verify that all mappings share this extension
for (ExtensionMapping m : mappings) {
if (!ext.equals(m.getExtension())) {
throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
}
}
// create new tab file with the help of the Archive class representing the core file or an extension
ArchiveFile af = ArchiveFile.buildTabFile();
af.setRowType(TERM_FACTORY.findTerm(ext.getRowType()));
af.setEncoding(CHARACTER_ENCODING);
af.setDateFormat("YYYY-MM-DD");
// in the generated file column 0 will be the id column
ArchiveField idField = new ArchiveField();
idField.setIndex(ID_COLUMN_INDEX);
af.setId(idField);
// find the union of all terms mapped and make them a field in the final archive
Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
// retrieve the ordered list of mapped ExtensionProperty
List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
// reassign indexes ordered by Extension
assignIndexesOrderedByExtension(propertyList, af);
// total column count is equal to id column + mapped columns
int totalColumns = 1 + propertyList.size();
// create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
// taxon2.txt, taxon3.txt)
String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
String fn = createFileName(dwcaFolder, extensionName);
// open new file writer for single data file
File dataFile = new File(dwcaFolder, fn);
// ready to go though each mapping and dump the data
try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
af.addLocation(dataFile.getName());
addMessage(Level.INFO, "Start writing data file for " + currExtension);
boolean headerWritten = false;
for (ExtensionMapping m : mappings) {
// prepare index ordered list of all output columns apart from id column
PropertyMapping[] inCols = new PropertyMapping[totalColumns];
for (ArchiveField f : af.getFields().values()) {
if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
}
}
// write header line 1 time only to file
if (!headerWritten) {
writeHeaderLine(propertyList, totalColumns, af, writer);
headerWritten = true;
}
// write data (records) to file
dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
// store record number by extension rowType
recordsByExtension.put(ext.getRowType(), currRecords);
}
} catch (IOException e) {
// some error writing this file, report
log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
// set last error report!
setState(e);
throw new GeneratorException("Error writing header line to data file", e);
}
// add archive file to archive
if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
archive.setCore(af);
} else {
archive.addExtension(af);
}
// final reporting
addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
// how many records were skipped?
if (currRecordsSkipped > 0) {
addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
}
}
Aggregations