Search in sources :

Example 1 with ValueGenerator

use of com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator in project etl by linkedpipes.

the class TableToRdfConfigurator method configure.

/**
 * Configure given {@link TableToRdf} convertor.
 *
 * @param tableToRdf
 * @param header
 * @param data Contains first data row, or ColumnType if type is already known.
 * @param numberOfLeadingEmpty Number of leading empty columns, this is useful for xsl-like.
 */
public static void configure(TableToRdf tableToRdf, List<String> header, List<Object> data, int numberOfLeadingEmpty) throws ParseFailed, LpException {
    // initial checks
    if (data == null) {
        throw new ParseFailed("First data row is null!");
    }
    if (header != null && header.size() != data.size()) {
        throw new ParseFailed("Diff number of cells in header (" + header.size() + ") and data (" + data.size() + ")");
    }
    // 
    final TableToRdfConfig config = tableToRdf.config;
    // clear configuration
    tableToRdf.baseUri = config.baseURI;
    tableToRdf.infoMap = null;
    tableToRdf.keyColumn = null;
    tableToRdf.nameToIndex = new HashMap<>();
    // prepare locals
    Map<String, ColumnInfo_V1> unused = new HashMap<>();
    unused.putAll(config.columnsInfo);
    List<ValueGenerator> valueGenerators = new ArrayList<>(data.size());
    // generate configuration - Column Mapping
    String keyTemplateStr = null;
    for (int index = 0; index < data.size(); index++) {
        // generate column name and add it to map
        final String columnName;
        if (header != null) {
            if (header.get(index) != null) {
                columnName = header.get(index);
            } else {
                LOG.info("Generated value used for column with 'null' name.");
                // use generated one - first is col1, col2 ...
                columnName = "col" + Integer.toString(index + 1);
            }
        } else {
            // use generated one - first is col1, col2 ...
            columnName = "col" + Integer.toString(index + 1);
        }
        LOG.debug("New column found '{}'", columnName);
        // check for null
        if (columnName == null) {
            LOG.warn("Column with name='null' is ignored.");
            continue;
        }
        // add column name
        tableToRdf.nameToIndex.put(columnName, index);
        // test for key
        if (config.keyColumn != null && !config.keyColumn.isEmpty() && !config.advancedKeyColumn && config.keyColumn.compareTo(columnName) == 0) {
            // we construct tempalte and use it
            keyTemplateStr = "<" + prepareAsUri("{", config) + columnName + "}>";
        }
        // check for user template
        final ColumnInfo_V1 columnInfo;
        if (config.columnsInfo.containsKey(columnName)) {
            // use user config
            columnInfo = config.columnsInfo.get(columnName);
            unused.remove(columnName);
        } else if (!config.generateNew) {
            // no new generation
            continue;
        } else {
            // generate new
            columnInfo = new ColumnInfo_V1();
        }
        // fill other values if needed
        if (columnInfo.getURI() == null) {
            columnInfo.setURI(config.baseURI + Utils.convertStringToIRIPart(columnName));
        } else {
            columnInfo.setURI(prepareAsUri(columnInfo.getURI(), config));
        }
        if (columnInfo.getType() == ColumnType.Auto) {
            if (index < numberOfLeadingEmpty) {
                // This is empty leading column, we use string
                // without warning.
                columnInfo.setType(ColumnType.String);
            } else if (config.autoAsStrings) {
                columnInfo.setType(ColumnType.String);
            } else {
                columnInfo.setType(guessType(columnName, data.get(index), columnInfo.isUseTypeFromDfb()));
            }
        }
        // generate tableToRdf configuration from 'columnInfo'
        final String template = generateTemplate(columnInfo, columnName);
        LOG.debug("Template for column '{}' is '{}'", columnName, template);
        // add to configuration
        valueGenerators.add(ValueGeneratorReplace.create(tableToRdf.valueFactory.createIRI(columnInfo.getURI()), template));
        // generate metadata about column - for now only labels
        if (config.generateLabels) {
            tableToRdf.outRdf.add(tableToRdf.valueFactory.createIRI(columnInfo.getURI()), RDFS.LABEL, tableToRdf.valueFactory.createLiteral(columnName));
        }
    }
    // key template
    if (config.advancedKeyColumn) {
        // we use keyColumn directly
        tableToRdf.keyColumn = ValueGeneratorReplace.create(null, config.keyColumn);
        tableToRdf.keyColumn.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
    } else if (keyTemplateStr != null) {
        // we have consructed tempalte
        LOG.info("Key column template: {}", keyTemplateStr);
        tableToRdf.keyColumn = ValueGeneratorReplace.create(null, keyTemplateStr);
        tableToRdf.keyColumn.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
    } else {
    // we use null, and then row number is used
    }
    // TODO: we do not support this functionality ..
    for (String key : unused.keySet()) {
        if (key.isEmpty()) {
            // - bug fix
            continue;
        }
        if (config.ignoreMissingColumn) {
            LOG.info("Column '{}' (uri:{}) ignored as does not match " + "original columns.", key, unused.get(key).getURI());
        } else {
            LOG.error("Column '{}' (uri:{}) ignored as does not match " + "original columns.", key, unused.get(key).getURI());
        }
    }
    // add advanced
    for (TabularConfig_V2.AdvanceMapping item : tableToRdf.config.columnsInfoAdv) {
        // prepare URI
        String uri = prepareAsUri(item.getUri(), config);
        // add tempalte
        valueGenerators.add(ValueGeneratorReplace.create(tableToRdf.valueFactory.createIRI(uri), item.getTemplate()));
    }
    // Compile valueGenerators
    for (ValueGenerator generator : valueGenerators) {
        generator.compile(tableToRdf.nameToIndex, tableToRdf.valueFactory);
    }
    // final checks and data sets
    tableToRdf.infoMap = valueGenerators.toArray(new ValueGenerator[0]);
    if (config.rowsClass != null && !config.rowsClass.isEmpty()) {
        try {
            tableToRdf.rowClass = tableToRdf.valueFactory.createIRI(config.rowsClass);
        } catch (IllegalArgumentException ex) {
            throw new ParseFailed("Failed to create row's class URI from:" + config.rowsClass, ex);
        }
    }
}
Also used : ValueGenerator(com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator) TabularConfig_V2(com.linkedpipes.plugin.transformer.tabularuv.TabularConfig_V2) ParseFailed(com.linkedpipes.plugin.transformer.tabularuv.parser.ParseFailed) ColumnInfo_V1(com.linkedpipes.plugin.transformer.tabularuv.TabularConfig_V2.ColumnInfo_V1)

Example 2 with ValueGenerator

use of com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator in project etl by linkedpipes.

the class TableToRdf method paserRow.

public void paserRow(List<Object> row, int rowNumber) throws LpException {
    if (row.size() < nameToIndex.size()) {
        LOG.warn("Row is smaller ({} instead of {}) - ignore.", row.size(), nameToIndex.size());
        return;
    } else if (row.size() > nameToIndex.size()) {
        LOG.warn("Row is too big, some data may be invalid!" + " (size: {} expected: {})", row.size(), nameToIndex.size());
    }
    // trim string values
    if (config.trimString) {
        List<Object> newRow = new ArrayList<>(row.size());
        for (Object item : row) {
            if (item instanceof String) {
                final String itemAsString = (String) item;
                newRow.add(itemAsString.trim());
            } else {
                newRow.add(item);
            }
        }
        row = newRow;
    }
    // get subject - key
    final IRI subj = prepareUri(row, rowNumber);
    if (subj == null) {
        LOG.error("Row ({}) has null key, row skipped.", rowNumber);
    }
    // parse the line, based on configuration
    for (ValueGenerator item : infoMap) {
        final IRI predicate = item.getUri();
        final Value value = item.generateValue(row, valueFactory);
        if (value == null) {
            if (config.ignoreBlankCells) {
            // ignore
            } else {
                // insert blank cell IRI
                outRdf.add(subj, predicate, TabularOntology.BLANK_CELL);
            }
        } else {
            // insert value
            outRdf.add(subj, predicate, value);
        }
    }
    // add row data - number, class, connection to table
    if (config.generateRowTriple) {
        outRdf.add(subj, TabularOntology.ROW_NUMBER, valueFactory.createLiteral(rowNumber));
    }
    if (rowClass != null) {
        outRdf.add(subj, typeUri, rowClass);
    }
    if (tableSubject != null) {
        outRdf.add(tableSubject, TabularOntology.TABLE_HAS_ROW, subj);
    }
    // Add table statistict only for the first time.
    if (!tableInfoGenerated && tableSubject != null) {
        tableInfoGenerated = true;
        if (config.generateTableClass) {
            outRdf.add(tableSubject, RDF.TYPE, TabularOntology.TABLE_CLASS);
        }
    }
}
Also used : IRI(org.eclipse.rdf4j.model.IRI) ValueGenerator(com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator) ArrayList(java.util.ArrayList) Value(org.eclipse.rdf4j.model.Value)

Aggregations

ValueGenerator (com.linkedpipes.plugin.transformer.tabularuv.column.ValueGenerator)2 TabularConfig_V2 (com.linkedpipes.plugin.transformer.tabularuv.TabularConfig_V2)1 ColumnInfo_V1 (com.linkedpipes.plugin.transformer.tabularuv.TabularConfig_V2.ColumnInfo_V1)1 ParseFailed (com.linkedpipes.plugin.transformer.tabularuv.parser.ParseFailed)1 ArrayList (java.util.ArrayList)1 IRI (org.eclipse.rdf4j.model.IRI)1 Value (org.eclipse.rdf4j.model.Value)1