use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.
the class ExtensionManagerImpl method removePropertyMapping.
/**
* Remove a PropertyMapping from an ExtensionMapping.
*
* @param extensionMapping ExtensionMapping
* @param qualifiedName of PropertyMapping term to remove
*/
private void removePropertyMapping(ExtensionMapping extensionMapping, String qualifiedName) {
PropertyMapping pm = extensionMapping.getField(qualifiedName);
Set<PropertyMapping> propertyMappings = extensionMapping.getFields();
if (pm != null && propertyMappings.contains(pm)) {
propertyMappings.remove(pm);
LOG.debug("Removed mapping to term " + pm.getTerm().qualifiedName());
}
}
use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.
the class ExtensionManagerImpl method migrateExtensionMapping.
/**
* Migrate an ExtensionMapping to use a newer version of that extension:
* 1. Migrate property mappings for deprecated terms that have been replaced by another term. Careful, the replacing
* term must be included in the newer extension version, and cannot already be mapped
* 2. Remove property mappings for deprecated terms that have NOT been replaced by another term
*
* @param extensionMapping ExtensionMapping to migrate to use newer version of Extension
* @param newer newer version of Extension ExtensionMapping is based on
* @param deprecated set of ExtensionProperty deprecated in newer version of Extension
*/
private ExtensionMapping migrateExtensionMapping(ExtensionMapping extensionMapping, Extension newer, Set<ExtensionProperty> deprecated) {
LOG.debug("Migrating extension mapping...");
// update Extension
extensionMapping.setExtension(newer);
// migrate or remove property mappings to deprecated terms
for (ExtensionProperty deprecatedProperty : deprecated) {
Term replacedBy = TERMS_REPLACED_BY_ANOTHER_TERM.get(deprecatedProperty.qualifiedName());
// replacing term must exist in new extension, and it cannot already be mapped!
if (replacedBy != null && newer.getProperty(replacedBy) != null && !extensionMapping.isMapped(replacedBy)) {
PropertyMapping pm = extensionMapping.getField(deprecatedProperty.qualifiedName());
ExtensionProperty ep = newer.getProperty(replacedBy);
if (pm != null && ep != null) {
pm.setTerm(ep);
LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " has been migrated to term " + replacedBy.qualifiedName());
}
} else // otherwise simply remove the property mapping
{
LOG.debug("Mapping to deprecated term " + deprecatedProperty.qualifiedName() + " cannot be migrated therefore it is being removed!");
removePropertyMapping(extensionMapping, deprecatedProperty.qualifiedName());
}
}
return extensionMapping;
}
use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.
the class GenerateDwca method applyTranslations.
/**
* Apply translations or default values to row, for all mapped properties.
* </br>
* The method starts by iterating through all mapped properties, checking each one if it has been translated or a
* default value provided. The original value in the row is then replaced with the translated or default value.
* A record array representing the values to be written to the data file is also updated.
*
* @param inCols values array, of columns in row that have been mapped
* @param in values array, of all columns in row
* @param doiUsedForDatasetId true if mapping should use resource DOI as datasetID, false otherwise
* @param doi DOI assigned to resource
*/
private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] record, boolean doiUsedForDatasetId, DOI doi) {
for (int i = 1; i < inCols.length; i++) {
PropertyMapping pm = inCols[i];
String val = null;
if (pm != null) {
if (pm.getIndex() != null) {
val = in[pm.getIndex()];
// translate value?
if (pm.getTranslation() != null && pm.getTranslation().containsKey(val)) {
val = pm.getTranslation().get(val);
// update value in original record
in[pm.getIndex()] = val;
}
}
// use default value for null values
if (val == null) {
val = pm.getDefaultValue();
}
// use DOI for datasetID property?
if (pm.getTerm().qualifiedName().equalsIgnoreCase(Constants.DWC_DATASET_ID) && doiUsedForDatasetId && doi != null) {
val = doi.getDoiString();
}
}
// add value to data file record
record[i] = val;
}
}
use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.
the class GenerateDwca method addFieldsToArchive.
/**
* First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
* term a field in the final archive. Static/default mappings are not stored for a field, since they are not
* expressed in meta.xml but instead get written to the data file.
*
* @param mappings list of ExtensionMapping
* @param af ArchiveFile
*
* @return set of conceptTerms that have been mapped (in all files) for a single Extension
*/
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
Set<Term> mappedConceptTerms = new HashSet<>();
for (ExtensionMapping m : mappings) {
// multi-value field delimiter, part of each source data configuration
String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
for (PropertyMapping pm : m.getFields()) {
Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
// ensure Extension has concept term
if (term != null && m.getExtension().getProperty(term) != null) {
if (af.hasTerm(term)) {
ArchiveField field = af.getField(term);
mappedConceptTerms.add(term);
// multi-value delimiter must be same across all sources
if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
}
} else {
if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
af.addField(buildField(term, delimitedBy));
mappedConceptTerms.add(term);
}
}
}
}
// if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
if (ep != null && m.isDoiUsedForDatasetId()) {
log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
// include datasetID field in ArchiveFile
ArchiveField f = buildField(DwcTerm.datasetID, null);
af.addField(f);
// include datasetID field mapping in ExtensionMapping
PropertyMapping pm = new PropertyMapping(f);
pm.setTerm(ep);
m.getFields().add(pm);
// include datasetID in set of all terms mapped for Extension
mappedConceptTerms.add(DwcTerm.datasetID);
}
}
return mappedConceptTerms;
}
use of org.gbif.ipt.model.PropertyMapping in project ipt by gbif.
the class GenerateDwca method addDataFile.
/**
* Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
* </br>
* The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
* mapped (e.g. occurrenceID, taxonID, etc).
*
* @param mappings list of ExtensionMapping
* @param rowLimit maximum number of rows to write
* @throws IllegalArgumentException if not all mappings are mapped to the same extension
* @throws InterruptedException if the thread was interrupted
* @throws IOException if problems occurred while persisting new data files
* @throws GeneratorException if any problem was encountered writing data file
*/
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
checkForInterruption();
if (mappings == null || mappings.isEmpty()) {
return;
}
// update reporting
currRecords = 0;
currRecordsSkipped = 0;
Extension ext = mappings.get(0).getExtension();
currExtension = ext.getTitle();
// verify that all mappings share this extension
for (ExtensionMapping m : mappings) {
if (!ext.equals(m.getExtension())) {
throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
}
}
// create new tab file with the help of the Archive class representing the core file or an extension
ArchiveFile af = ArchiveFile.buildTabFile();
af.setRowType(TERM_FACTORY.findTerm(ext.getRowType()));
af.setEncoding(CHARACTER_ENCODING);
af.setDateFormat("YYYY-MM-DD");
// in the generated file column 0 will be the id column
ArchiveField idField = new ArchiveField();
idField.setIndex(ID_COLUMN_INDEX);
af.setId(idField);
// find the union of all terms mapped and make them a field in the final archive
Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
// retrieve the ordered list of mapped ExtensionProperty
List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
// reassign indexes ordered by Extension
assignIndexesOrderedByExtension(propertyList, af);
// total column count is equal to id column + mapped columns
int totalColumns = 1 + propertyList.size();
// create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
// taxon2.txt, taxon3.txt)
String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
String fn = createFileName(dwcaFolder, extensionName);
// open new file writer for single data file
File dataFile = new File(dwcaFolder, fn);
// ready to go though each mapping and dump the data
try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
af.addLocation(dataFile.getName());
addMessage(Level.INFO, "Start writing data file for " + currExtension);
boolean headerWritten = false;
for (ExtensionMapping m : mappings) {
// prepare index ordered list of all output columns apart from id column
PropertyMapping[] inCols = new PropertyMapping[totalColumns];
for (ArchiveField f : af.getFields().values()) {
if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
}
}
// write header line 1 time only to file
if (!headerWritten) {
writeHeaderLine(propertyList, totalColumns, af, writer);
headerWritten = true;
}
// write data (records) to file
dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
// store record number by extension rowType
recordsByExtension.put(ext.getRowType(), currRecords);
}
} catch (IOException e) {
// some error writing this file, report
log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
// set last error report!
setState(e);
throw new GeneratorException("Error writing header line to data file", e);
}
// add archive file to archive
if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
archive.setCore(af);
} else {
archive.addExtension(af);
}
// final reporting
addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
// how many records were skipped?
if (currRecordsSkipped > 0) {
addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
}
}
Aggregations