use of com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator in project etl by linkedpipes.
the class TableToRdfConfigurator method configure.
/**
* Configure given {@link TableToRdf} convertor.
*
* @param tableToRdf
* @param header
* @param data Contains first data row, or ColumnType if type is already known.
* @param numberOfLeadingEmpty Number of leading empty columns, this is useful for xsl-like.
*/
public static void configure(TableToRdf tableToRdf, List<String> header, List<Object> data, int numberOfLeadingEmpty) throws ParseFailed, LpException {
// initial checks
if (data == null) {
throw new ParseFailed("First data row is null!");
}
if (header != null && header.size() != data.size()) {
throw new ParseFailed("Diff number of cells in header (" + header.size() + ") and data (" + data.size() + ")");
}
//
final TableToRdfConfig config = tableToRdf.config;
// clear configuration
tableToRdf.baseUri = config.baseURI;
tableToRdf.infoMap = null;
tableToRdf.keyColumn = null;
tableToRdf.nameToIndex = new HashMap<>();
// prepare locals
Map<String, ColumnInfo_V1> unused = new HashMap<>();
unused.putAll(config.columnsInfo);
List<ValueGenerator> valueGenerators = new ArrayList<>(data.size());
// generate configuration - Column Mapping
String keyTemplateStr = null;
for (int index = 0; index < data.size(); index++) {
// generate column name and add it to map
final String columnName;
if (header != null) {
if (header.get(index) != null) {
columnName = header.get(index);
} else {
LOG.info("Generated value used for column with 'null' name.");
// use generated one - first is col1, col2 ...
columnName = "col" + Integer.toString(index + 1);
}
} else {
// use generated one - first is col1, col2 ...
columnName = "col" + Integer.toString(index + 1);
}
LOG.debug("New column found '{}'", columnName);
// check for null
if (columnName == null) {
LOG.warn("Column with name='null' is ignored.");
continue;
}
// add column name
tableToRdf.nameToIndex.put(columnName, index);
// test for key
if (config.keyColumn != null && !config.keyColumn.isEmpty() && !config.advancedKeyColumn && config.keyColumn.compareTo(columnName) == 0) {
// we construct tempalte and use it
keyTemplateStr = "<" + prepareAsUri("{", config) + columnName + "}>";
}
// check for user template
final ColumnInfo_V1 columnInfo;
if (config.columnsInfo.containsKey(columnName)) {
// use user config
columnInfo = config.columnsInfo.get(columnName);
unused.remove(columnName);
} else if (!config.generateNew) {
// no new generation
continue;
} else {
// generate new
columnInfo = new ColumnInfo_V1();
}
// fill other values if needed
if (columnInfo.getURI() == null) {
columnInfo.setURI(config.baseURI + Utils.convertStringToIRIPart(columnName));
} else {
columnInfo.setURI(prepareAsUri(columnInfo.getURI(), config));
}
if (columnInfo.getType() == ColumnType.Auto) {
if (index < numberOfLeadingEmpty) {
// This is empty leading column, we use string
// without warning.
columnInfo.setType(ColumnType.String);
} else if (config.autoAsStrings) {
columnInfo.setType(ColumnType.String);
} else {
columnInfo.setType(guessType(columnName, data.get(index), columnInfo.isUseTypeFromDfb()));
}
}
// generate tableToRdf configuration from 'columnInfo'
final String template = generateTemplate(columnInfo, columnName);
LOG.debug("Template for column '{}' is '{}'", columnName, template);
// add to configuration
valueGenerators.add(ValueGeneratorReplace.create(tableToRdf.valueFactory.createIRI(columnInfo.getURI()), template));
// generate metadata about column - for now only labels
if (config.generateLabels) {
tableToRdf.outRdf.add(tableToRdf.valueFactory.createIRI(columnInfo.getURI()), RDFS.LABEL, tableToRdf.valueFactory.createLiteral(columnName));
}
}
// key template
if (config.advancedKeyColumn) {
// we use keyColumn directly
tableToRdf.keyColumn = ValueGeneratorReplace.create(null, config.keyColumn);
tableToRdf.keyColumn.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
} else if (keyTemplateStr != null) {
// we have consructed tempalte
LOG.info("Key column template: {}", keyTemplateStr);
tableToRdf.keyColumn = ValueGeneratorReplace.create(null, keyTemplateStr);
tableToRdf.keyColumn.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
} else {
// we use null, and then row number is used
}
// TODO: we do not support this functionality ..
for (String key : unused.keySet()) {
if (key.isEmpty()) {
// - bug fix
continue;
}
if (config.ignoreMissingColumn) {
LOG.info("Column '{}' (uri:{}) ignored as does not match " + "original columns.", key, unused.get(key).getURI());
} else {
LOG.error("Column '{}' (uri:{}) ignored as does not match " + "original columns.", key, unused.get(key).getURI());
}
}
// add advanced
for (TabularConfig_V2.AdvanceMapping item : tableToRdf.config.columnsInfoAdv) {
// prepare URI
String uri = prepareAsUri(item.getUri(), config);
// add tempalte
valueGenerators.add(ValueGeneratorReplace.create(tableToRdf.valueFactory.createIRI(uri), item.getTemplate()));
}
// Compile valueGenerators
for (ValueGenerator generator : valueGenerators) {
generator.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
}
// final checks and data sets
tableToRdf.infoMap = valueGenerators.toArray(new ValueGenerator[0]);
if (config.rowsClass != null && !config.rowsClass.isEmpty()) {
try {
tableToRdf.rowClass = tableToRdf.valueFactory.createIRI(config.rowsClass);
} catch (IllegalArgumentException ex) {
throw new ParseFailed("Failed to create row's class URI from:" + config.rowsClass, ex);
}
}
}
use of com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator in project etl by linkedpipes.
the class TableToRdf method paserRow.
public void paserRow(List<Object> row, int rowNumber) throws LpException {
if (row.size() < nameToIndex.size()) {
LOG.warn("Row is smaller ({} instead of {}) - ignore.", row.size(), nameToIndex.size());
return;
} else if (row.size() > nameToIndex.size()) {
LOG.warn("Row is too big, some data may be invalid!" + " (size: {} expected: {})", row.size(), nameToIndex.size());
}
// trim string values
if (config.trimString) {
List<Object> newRow = new ArrayList<>(row.size());
for (Object item : row) {
if (item instanceof String) {
final String itemAsString = (String) item;
newRow.add(itemAsString.trim());
} else {
newRow.add(item);
}
}
row = newRow;
}
// get subject - key
final IRI subj = prepareUri(row, rowNumber);
if (subj == null) {
LOG.error("Row ({}) has null key, row skipped.", rowNumber);
}
// parse the line, based on configuration
for (ValueGenerator item : infoMap) {
final IRI predicate = item.getUri();
final Value value = item.generateValue(row, valueFactory);
if (value == null) {
if (config.ignoreBlankCells) {
// ignore
} else {
// insert blank cell IRI
outRdf.add(subj, predicate, TabularOntology.BLANK_CELL);
}
} else {
// insert value
outRdf.add(subj, predicate, value);
}
}
// add row data - number, class, connection to table
if (config.generateRowTriple) {
outRdf.add(subj, TabularOntology.ROW_NUMBER, valueFactory.createLiteral(rowNumber));
}
if (rowClass != null) {
outRdf.add(subj, typeUri, rowClass);
}
if (tableSubject != null) {
outRdf.add(tableSubject, TabularOntology.TABLE_HAS_ROW, subj);
}
// Add table statistict only for the first time.
if (!tableInfoGenerated && tableSubject != null) {
tableInfoGenerated = true;
if (config.generateTableClass) {
outRdf.add(tableSubject, RDF.TYPE, TabularOntology.TABLE_CLASS);
}
}
}
Aggregations