use of com.localidata.process.bean.DataBean in project local-data-aragopedia by aragonopendata.
the class GenerateConfig method generateAllConfig.
public void generateAllConfig() {
log.debug("Init generateAllConfig");
ArrayList<String> dimension = extractDimensions(dimensionDirectoryString);
HashMap<String, ConfigBean> configExtrated = new HashMap<String, ConfigBean>();
File inputDirectoryFile = new File(inputDirectoryString);
Collection<File> listCSV = FileUtils.listFiles(inputDirectoryFile, extensions, true);
int cont = 0;
int size = listCSV.size();
for (File file : listCSV) {
ArrayList<DataBean> skosData = new ArrayList<DataBean>();
String id = "";
String letters = "";
if (file.getName().endsWith("A.csv")) {
id = file.getName().substring(0, file.getName().length() - 5);
letters = file.getName().substring(file.getName().length() - 5, file.getName().length() - 4);
} else {
id = file.getName().substring(0, file.getName().length() - 6);
letters = file.getName().substring(file.getName().length() - 6, file.getName().length() - 4);
}
log.info("Comienza tratamiento para " + id + letters + " " + (++cont) + "/" + size);
ConfigBean configBean = null;
if (configExtrated.get(id) != null) {
configBean = configExtrated.get(id);
} else {
configBean = new ConfigBean();
configBean.setId(id);
}
configBean.getLetters().add(letters);
try {
List<String> csvLines = FileUtils.readLines(file, "UTF-8");
String headerLine = Utils.weakClean(csvLines.get(0));
String[] cells = headerLine.split("\t");
for (int h = 0; h < cells.length; h++) {
String name = cells[h];
DataBean dataBean = null;
if (configBean.getMapData().get(name) != null) {
dataBean = configBean.getMapData().get(name);
} else {
dataBean = new DataBean();
dataBean.setName(name.trim());
}
if (name.toLowerCase().contains("código") || name.toLowerCase().contains("codigo")) {
dataBean.setNormalizacion("null");
dataBean.setDimensionMesure("null");
dataBean.setType("null");
} else {
if (contains(dimension, name + ".txt")) {
dataBean.setDimensionMesure("dim");
if (dataBean.getName().toLowerCase().contains("comarca")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-comarca");
} else if (dataBean.getName().toLowerCase().contains("municipio")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Municipio");
} else if (dataBean.getName().toLowerCase().contains("provincia")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Provincia");
} else if (dataBean.getName().toLowerCase().contains("comunidad") || dataBean.getName().toLowerCase().contains("aragón") || dataBean.getName().toLowerCase().contains("ccaa")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Comunidad");
} else {
if (name.toLowerCase().contains("año")) {
String type = "";
for (int j = 1; j < csvLines.size(); j++) {
String line = Utils.weakClean(csvLines.get(j));
if (Utils.v(line)) {
String[] cellsLine = line.split("\t");
if (cellsLine.length > 0 && cellsLine.length > h) {
String cell = cellsLine[h];
if (Utils.isDate(cell) && !type.equals("xsd:int")) {
type = "xsd:date";
} else if (Utils.isInteger(cell)) {
type = "xsd:int";
} else {
log.info("La celda '" + cell + "' de la columna '" + name + "' no es un año");
break;
}
}
}
}
if (type.equals("xsd:date")) {
dataBean.setNormalizacion("sdmx-dimension:refPeriod");
dataBean.setType("xsd:date");
} else if (type.equals("xsd:int")) {
dataBean.setDimensionMesure("medida");
dataBean.setNormalizacion(Prop.datasetName + "-measure:" + Utils.urlify(name));
dataBean.setType("xsd:int");
} else {
dataBean.setNormalizacion(Prop.datasetName + "-dimension:" + Utils.urlify(name));
dataBean.setType("xsd:string");
}
} else {
dataBean.setNormalizacion(Prop.datasetName + "-dimension:" + Utils.urlify(name));
dataBean.setType("skos:Concept");
skosData.add(dataBean);
}
}
} else {
dataBean.setDimensionMesure("medida");
dataBean.setNormalizacion(Prop.datasetName + "-measure:" + Utils.urlify(name));
String type = "";
for (int j = 1; j < csvLines.size(); j++) {
String line = Utils.weakClean(csvLines.get(j));
if (Utils.v(line)) {
String[] cellsLine = line.split("\t");
if (cellsLine.length > 0 && cellsLine.length > h) {
String cell = cellsLine[h];
if (Utils.isInteger(cell)) {
if (type.equals(""))
type = "xsd:int";
} else if (Utils.isDouble(cell)) {
if (!type.equals("xsd:string"))
type = "xsd:double";
} else if (Utils.v(cell)) {
type = "xsd:string";
break;
}
} else {
type = "xsd:string";
}
}
}
if (type.equals(""))
type = "xsd:string";
dataBean.setType(type);
}
}
configBean.getMapData().put(dataBean.getName(), dataBean);
}
if (skosData.size() > 0)
extractSkosConcept(csvLines, skosData);
} catch (IOException e) {
log.error("Error to read lines", e);
}
configExtrated.put(configBean.getId(), configBean);
log.info("Finaliza tratamiento para " + id + letters);
}
cont = 0;
size = configExtrated.keySet().size();
for (String key : configExtrated.keySet()) {
ConfigBean configBean = configExtrated.get(key);
String letters = "-";
for (String letter : configBean.getLetters()) {
letters = letters + letter + "-";
}
letters = letters.substring(0, letters.length() - 1);
configBean.setNameFile("Informe-" + configBean.getId() + letters + ".csv");
log.info("Comienza a escribirse el archivo " + "Informe-" + configBean.getId() + letters + ".csv " + (++cont) + "/" + size);
configBean.toCSV();
log.info("Finaliza de escribirse el archivo " + "Informe-" + configBean.getId() + letters + ".csv");
}
generateSkosMapping();
log.debug("End generateAllConfig");
}
use of com.localidata.process.bean.DataBean in project local-data-aragopedia by aragonopendata.
the class GenerateData method writeSkosTTL.
private void writeSkosTTL() {
log.debug("Init createSkos");
log.info("Init to create skos");
File kosFile = new File(outputDirectoryString + File.separator + "DatosTTL" + File.separator + "codelists" + File.separator + "kos.ttl");
StringBuffer resultIni = new StringBuffer();
StringBuffer resultFin = new StringBuffer();
ArrayList<String> kosCreated = new ArrayList<String>();
resultIni.append(TransformToRDF.addPrefix());
for (Iterator<DataBean> itDataBean = dataWithSkos.iterator(); itDataBean.hasNext(); ) {
DataBean dataBean = itDataBean.next();
if (dataBean != null && !kosCreated.contains(dataBean.getNameNormalized()) && dataBean.getMapSkos().size() > 0) {
String suject = Prop.host + "/" + Prop.kosName + "/" + Prop.datasetName + "/" + dataBean.getKosName();
resultIni.append("<" + suject + "> " + "a skos:ConceptScheme;\n");
resultIni.append("\tskos:notation \"" + dataBean.getNameNormalized() + "\";\n");
resultIni.append("\trdfs:label \"" + dataBean.getName() + "\";\n");
for (Iterator<String> iterator3 = dataBean.getMapSkos().keySet().iterator(); iterator3.hasNext(); ) {
String keySkos = iterator3.next();
SkosBean skosBean = dataBean.getMapSkos().get(keySkos);
if (skosBean != null) {
String sujectKos = suject + "/" + Utils.urlify(skosBean.getId());
if (skosBean.getParent() == null) {
resultIni.append("\tskos:hasTopConcept <" + sujectKos + ">;\n");
}
resultFin.append("<" + sujectKos + "> a skos:Concept;\n");
resultFin.append("\tskos:inScheme <" + suject + ">;\n");
String label = skosBean.getId();
if (skosBean.getLabel() != null && !skosBean.getLabel().equals(""))
label = skosBean.getLabel();
resultFin.append("\tskos:notation \"" + skosBean.getId() + "\";\n");
resultFin.append("\tskos:prefLabel \"" + Utils.prefLabelClean(label) + "\"");
if (skosBean.getSons().size() > 0) {
resultFin.append(";\n");
for (Iterator<SkosBean> itSons = skosBean.getSons().iterator(); itSons.hasNext(); ) {
SkosBean son = itSons.next();
resultFin.append("\tskos:narrower <" + suject + "/" + son.getId() + ">");
if (itSons.hasNext()) {
resultFin.append(";\n");
} else {
resultFin.append(".\n");
}
}
} else {
resultFin.append(".\n");
}
resultFin.append("\n");
}
}
resultIni.append("\n");
resultIni.append(resultFin);
kosCreated.add(dataBean.getNameNormalized());
}
Utils.stringToFileAppend(resultIni.toString(), kosFile);
resultIni.setLength(0);
resultFin.setLength(0);
}
log.info("end to create skos");
log.debug("End createSkos");
}
use of com.localidata.process.bean.DataBean in project local-data-aragopedia by aragonopendata.
the class GenerateData method readConfig.
private void readConfig() {
log.debug("Init readConfig");
log.info("Comienza a extraerse la configuración");
File configDirectoryFile = new File(configDirectoryString);
File areasReportFile = new File(outputDirectoryString + File.separator + "areas.txt");
Collection<File> listCSV = FileUtils.listFiles(configDirectoryFile, extensionsConfig, true);
int cont = 0;
int size = listCSV.size();
for (File file : listCSV) {
log.info("Se extrae el fichero " + file.getName() + " " + (++cont) + " " + size);
if (!file.getName().startsWith("mapping")) {
ConfigBean configBean = new ConfigBean();
configBean.setNameFile(file.getName());
String id = file.getName().substring(8);
String areas = "";
id = id.replace(".csv", "");
id = id.replace(".xlsx", "");
if (id.contains("TC")) {
id = id.replace("TC", "");
configBean.getLetters().add("TC");
areas += "TC ";
}
if (id.contains("TM")) {
id = id.replace("TM", "");
configBean.getLetters().add("TM");
areas += "TM ";
}
if (id.contains("TP")) {
id = id.replace("TP", "");
configBean.getLetters().add("TP");
areas += "TP ";
}
if (id.contains("A")) {
id = id.replace("A", "");
configBean.getLetters().add("A");
areas += "A ";
}
while (id.charAt(id.length() - 1) == '-') {
id = id.substring(0, id.length() - 1);
}
configBean.setId(id);
if (Prop.formatConfig.equals("csv")) {
readCsv(file, configBean);
} else {
readXlsxFile(file, configBean);
}
mapconfig.put(id, configBean);
Utils.stringToFileAppend(id + " " + areas + "\n", areasReportFile);
}
}
for (Iterator<DataBean> it1 = dataWithSkosHierarchical.iterator(); it1.hasNext(); ) {
DataBean data1 = (DataBean) it1.next();
if (data1.getRelationKos() != null) {
DataBean data2 = mapconfig.get(data1.getIdConfig()).getMapData().get(data1.getRelationKos());
HashMap<String, SkosBean> mapSkos = data1.mergeSkos(data2);
if (mapSkos != null) {
log.info("Kos " + data1.getName() + " is parent of " + data2.getName());
data2.setWriteSkos(false);
data1.setMapSkos(mapSkos);
data2.setMapSkos(mapSkos);
mapconfig.get(data1.getIdConfig()).getMapData().get(data1.getNameNormalized()).setMapSkos(mapSkos);
mapconfig.get(data2.getIdConfig()).getMapData().get(data2.getNameNormalized()).setMapSkos(mapSkos);
}
}
}
log.info("Finaliza de extraerse la configuración");
log.debug("End readConfig");
}
use of com.localidata.process.bean.DataBean in project local-data-aragopedia by aragonopendata.
the class GenerateData method readCsv.
private void readCsv(File file, ConfigBean configBean) {
log.debug("Init readCsv");
List<String> csvLines;
try {
csvLines = FileUtils.readLines(file, "UTF-8");
String[] cellsName = csvLines.get(0).split(",");
String[] cellsNameNormalized = csvLines.get(1).split(",");
String[] cellsNormalization = csvLines.get(2).split(",");
String[] cellsDimMesure = csvLines.get(3).split(",");
String[] cellsType = csvLines.get(4).split(",");
String[] cellsSkosfile = csvLines.get(5).split(",");
String[] cellsConstant = null;
String[] cellsConstantValue = null;
String[] cellsRelationKos = null;
String[] cellsKosName = null;
if (csvLines.size() == 7)
cellsConstant = csvLines.get(6).split(",");
if (csvLines.size() == 8)
cellsConstantValue = csvLines.get(7).split(",");
if (csvLines.size() == 9)
cellsRelationKos = csvLines.get(9).split(",");
if (csvLines.size() == 10)
cellsKosName = csvLines.get(10).split(",");
int columnReaded = 0;
while (columnReaded < cellsName.length) {
DataBean dataBean = new DataBean();
if (cellsName[columnReaded] == null) {
columnReaded++;
} else {
dataBean.setName(removeStartEndCaracter(cellsName[columnReaded]));
dataBean.setNameNormalized(removeStartEndCaracter(cellsNameNormalized[columnReaded]));
dataBean.setNormalizacion(removeStartEndCaracter(cellsNormalization[columnReaded]));
dataBean.setDimensionMesure(removeStartEndCaracter(cellsDimMesure[columnReaded]));
dataBean.setIdConfig(configBean.getId());
String type = "";
if (Utils.v(removeStartEndCaracter(cellsType[columnReaded]))) {
type = removeStartEndCaracter(cellsType[columnReaded]);
} else {
type = "xsd:string";
}
dataBean.setType(type);
if (Utils.v(removeStartEndCaracter(cellsSkosfile[columnReaded]))) {
HashMap<String, SkosBean> mapSkos = readMappingFileCSV(removeStartEndCaracter(cellsSkosfile[columnReaded]));
dataBean.setMapSkos(mapSkos);
configBean.getMapData().put(dataBean.getNameNormalized(), dataBean);
dataWithSkos.add(dataBean);
} else {
configBean.getMapData().put(dataBean.getNameNormalized(), dataBean);
}
if (Prop.addDataConstant && cellsConstant != null && Utils.v(removeStartEndCaracter(cellsConstant[columnReaded])) && removeStartEndCaracter(cellsConstant[columnReaded]).equals(Constants.constante)) {
if (Utils.v(removeStartEndCaracter(cellsConstantValue[columnReaded]))) {
dataBean.setConstant(removeStartEndCaracter(cellsConstantValue[columnReaded]) + "");
configBean.getListDataConstant().add(dataBean);
}
}
if (cellsRelationKos != null && Utils.v(removeStartEndCaracter(cellsRelationKos[columnReaded]))) {
dataBean.setRelationKos(removeStartEndCaracter(cellsRelationKos[columnReaded]));
dataWithSkosHierarchical.add(dataBean);
}
if (cellsKosName != null && Utils.v(removeStartEndCaracter(cellsKosName[columnReaded]))) {
dataBean.setKosName(removeStartEndCaracter(cellsKosName[columnReaded]));
} else {
dataBean.setKosName(dataBean.getNameNormalized());
}
columnReaded++;
}
}
} catch (IOException e) {
log.error("Error read csv ", e);
}
log.debug("End readCsv");
}
use of com.localidata.process.bean.DataBean in project local-data-aragopedia by aragonopendata.
the class TransformToRDF method addHeader.
private String addHeader(String headerLine, String nextLine, String fileName, int numfile, ArrayList<String> lettersList) {
log.debug("Init addHeader " + headerLine + " " + nextLine + " " + fileName + " " + numfile);
String resultado = "";
boolean year = false;
String aux = "";
resultado = Prop.host + "/" + Prop.eldaName + "/" + Prop.datasetName + "/dsd/" + fileName;
aux = "<" + resultado + "> a qb:DataStructureDefinition ;" + "\n";
aux = aux + "\trdfs:label \"Estructura de los cubos de datos que se corresponden con los informes " + fileName + "\"@es ;" + "\n";
String notation = "\"DSD-" + fileName + "\"";
aux = aux + "\tskos:notation " + notation + " ;" + "\n";
String letters = "";
for (String letter : lettersList) {
letters += letter + " ";
}
aux = aux + "\trdfs:comment \"Esta estructura afecta a las areas: " + letters + "\"^^xsd:string ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " " + notation);
headerLine = Utils.weakClean(headerLine);
String[] cells = headerLine.split("\t");
int col = 1;
for (int h = 0; h < cells.length; h++) {
String cell = cells[h];
String cleanCell = Utils.weakClean(cell);
String normalizedCell = Utils.urlify(cell);
DataBean dataBean = configBean.getMapData().get(normalizedCell);
cleanHeader.add(cleanCell);
normalizedHeader.add(normalizedCell);
if (dataBean != null) {
if (dataBean.getNormalizacion() != null) {
boolean noRepetido = true;
if (!propertiesList.contains(dataBean.getNormalizacion())) {
propertiesList.add(dataBean.getNormalizacion());
} else {
noRepetido = false;
}
if (!dataBean.getNormalizacion().contains("sdmx-dimension:refPeriod")) {
aux = "<" + resultado + "> qb:component _:node" + numfile + "egmfx" + col + " ." + "\n";
if (!dsdList.contains(resultado + " " + dataBean.getDimensionMesure() + " " + dataBean.getNormalizacion())) {
Utils.stringToFileAppend(aux, dsdFile);
}
if (!dataBean.getType().contains(Constants.URIType)) {
aux = "_:node" + numfile + "egmfx" + col + " " + dataBean.getDimensionMesure() + " " + dataBean.getNormalizacion() + " ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " " + dataBean.getDimensionMesure() + " " + dataBean.getNormalizacion());
if (noRepetido) {
String coded = dataBean.getDimensionMesure().equals(Constants.mesure) ? "" : ", qb:CodedProperty ";
propertiesContent.append(dataBean.getNormalizacion() + " a " + dataBean.getDimensionMesureProperty() + " , rdf:Property" + coded + ";" + "\n");
propertiesContent.append("\trdfs:label \"" + cleanCell + "\"@es ;" + "\n");
propertiesContent.append("\trdfs:comment \"" + cleanCell + "\"@es ;" + "\n");
propertiesContent.append("\trdfs:range " + dataBean.getType());
if (dataBean.getType().equals(Constants.skosType)) {
if (dataBean.getMapSkos().keySet().size() > 0) {
propertiesContent.append(" ;" + "\n");
String key = dataBean.getMapSkos().keySet().iterator().next();
String codeList = dataBean.getMapSkos().get(key).getURI();
codeList = codeList.substring(0, codeList.lastIndexOf("/"));
propertiesContent.append("\tqb:codeList <" + codeList + "> ." + "\n");
} else {
propertiesContent.append(" ." + "\n");
if (cleanCell.equals("")) {
TransformToRDF.insertError(fileName + ". ERROR. CELL EMPTY " + ". SKOS FOR THIS COLUMN NOT FOUND ");
log.error(fileName + ". ERROR. CELL EMPTY " + ". SKOS FOR THIS COLUMN NOT FOUND ");
}
TransformToRDF.insertError(fileName + ". ERROR. Column " + cleanCell + ". SKOS FOR THIS COLUMN NOT FOUND ");
log.error(fileName + ". ERROR. Column " + cleanCell + ". SKOS FOR THIS COLUMN NOT FOUND ");
}
} else {
propertiesContent.append(" ." + "\n");
}
propertiesContent.append("" + "\n");
Utils.stringToFileAppend(propertiesContent.toString(), propertiesFile);
propertiesContent.setLength(0);
}
} else {
aux = "_:node" + numfile + "egmfx" + col + " " + dataBean.getDimensionMesure() + " " + dataBean.getDimensionMesureSDMX() + ":refArea ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " " + dataBean.getDimensionMesure() + " " + dataBean.getDimensionMesureSDMX() + ":refArea");
}
} else {
year = true;
aux = "<" + resultado + "> qb:component _:node" + numfile + "egmfx" + col + " ." + "\n";
aux = aux + "_:node" + numfile + "egmfx" + col + " " + dataBean.getDimensionMesure() + " " + dataBean.getDimensionMesureSDMX() + ":refPeriod ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " " + dataBean.getDimensionMesure() + " " + dataBean.getDimensionMesureSDMX() + ":refPeriod");
}
col++;
}
} else {
if (normalizedCell.equals("")) {
TransformToRDF.insertError(fileName + ". ERROR. CELL EMPTY " + ". CONFIGURATION FOR THIS COLUMN NOT FOUND ");
log.error(fileName + ". ERROR. CELL EMPTY " + ". CONFIGURATION FOR THIS COLUMN NOT FOUND ");
}
TransformToRDF.insertError(fileName + ". ERROR. Column " + normalizedCell + ". CONFIGURATION FOR THIS COLUMN NOT FOUND ");
log.error(fileName + ". ERROR. Column " + normalizedCell + ". CONFIGURATION FOR THIS COLUMN NOT FOUND ");
}
}
if (!year && cells.length > 1) {
aux = "<" + resultado + "> qb:component _:node" + numfile + "egmfx" + col + " ." + "\n";
aux = aux + "_:node" + numfile + "egmfx" + col + " qb:dimension sdmx-dimension:refPeriod ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " qb:dimension sdmx-dimension:refPeriod");
}
if (configBean.getListDataConstant().size() > 0) {
for (DataBean data : configBean.getListDataConstant()) {
aux = "<" + resultado + "> qb:component _:node" + numfile + "egmfx" + col + " ." + "\n";
aux = aux + "_:node" + numfile + "egmfx" + col + " " + data.getDimensionMesure() + " " + data.getNormalizacion() + " ." + "\n";
aux = aux + "\n";
insertDsd(aux, resultado + " " + data.getDimensionMesure() + " " + data.getNormalizacion());
}
}
log.debug("End addHeader");
return resultado;
}
Aggregations