use of com.localidata.process.bean.ConfigBean in project local-data-aragopedia by aragonopendata.
the class GenerateConfig method generateAllConfig.
public void generateAllConfig() {
log.debug("Init generateAllConfig");
ArrayList<String> dimension = extractDimensions(dimensionDirectoryString);
HashMap<String, ConfigBean> configExtrated = new HashMap<String, ConfigBean>();
File inputDirectoryFile = new File(inputDirectoryString);
Collection<File> listCSV = FileUtils.listFiles(inputDirectoryFile, extensions, true);
int cont = 0;
int size = listCSV.size();
for (File file : listCSV) {
ArrayList<DataBean> skosData = new ArrayList<DataBean>();
String id = "";
String letters = "";
if (file.getName().endsWith("A.csv")) {
id = file.getName().substring(0, file.getName().length() - 5);
letters = file.getName().substring(file.getName().length() - 5, file.getName().length() - 4);
} else {
id = file.getName().substring(0, file.getName().length() - 6);
letters = file.getName().substring(file.getName().length() - 6, file.getName().length() - 4);
}
log.info("Comienza tratamiento para " + id + letters + " " + (++cont) + "/" + size);
ConfigBean configBean = null;
if (configExtrated.get(id) != null) {
configBean = configExtrated.get(id);
} else {
configBean = new ConfigBean();
configBean.setId(id);
}
configBean.getLetters().add(letters);
try {
List<String> csvLines = FileUtils.readLines(file, "UTF-8");
String headerLine = Utils.weakClean(csvLines.get(0));
String[] cells = headerLine.split("\t");
for (int h = 0; h < cells.length; h++) {
String name = cells[h];
DataBean dataBean = null;
if (configBean.getMapData().get(name) != null) {
dataBean = configBean.getMapData().get(name);
} else {
dataBean = new DataBean();
dataBean.setName(name.trim());
}
if (name.toLowerCase().contains("código") || name.toLowerCase().contains("codigo")) {
dataBean.setNormalizacion("null");
dataBean.setDimensionMesure("null");
dataBean.setType("null");
} else {
if (contains(dimension, name + ".txt")) {
dataBean.setDimensionMesure("dim");
if (dataBean.getName().toLowerCase().contains("comarca")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-comarca");
} else if (dataBean.getName().toLowerCase().contains("municipio")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Municipio");
} else if (dataBean.getName().toLowerCase().contains("provincia")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Provincia");
} else if (dataBean.getName().toLowerCase().contains("comunidad") || dataBean.getName().toLowerCase().contains("aragón") || dataBean.getName().toLowerCase().contains("ccaa")) {
dataBean.setNormalizacion("sdmx-dimension:refArea");
dataBean.setType("URI-Comunidad");
} else {
if (name.toLowerCase().contains("año")) {
String type = "";
for (int j = 1; j < csvLines.size(); j++) {
String line = Utils.weakClean(csvLines.get(j));
if (Utils.v(line)) {
String[] cellsLine = line.split("\t");
if (cellsLine.length > 0 && cellsLine.length > h) {
String cell = cellsLine[h];
if (Utils.isDate(cell) && !type.equals("xsd:int")) {
type = "xsd:date";
} else if (Utils.isInteger(cell)) {
type = "xsd:int";
} else {
log.info("La celda '" + cell + "' de la columna '" + name + "' no es un año");
break;
}
}
}
}
if (type.equals("xsd:date")) {
dataBean.setNormalizacion("sdmx-dimension:refPeriod");
dataBean.setType("xsd:date");
} else if (type.equals("xsd:int")) {
dataBean.setDimensionMesure("medida");
dataBean.setNormalizacion(Prop.datasetName + "-measure:" + Utils.urlify(name));
dataBean.setType("xsd:int");
} else {
dataBean.setNormalizacion(Prop.datasetName + "-dimension:" + Utils.urlify(name));
dataBean.setType("xsd:string");
}
} else {
dataBean.setNormalizacion(Prop.datasetName + "-dimension:" + Utils.urlify(name));
dataBean.setType("skos:Concept");
skosData.add(dataBean);
}
}
} else {
dataBean.setDimensionMesure("medida");
dataBean.setNormalizacion(Prop.datasetName + "-measure:" + Utils.urlify(name));
String type = "";
for (int j = 1; j < csvLines.size(); j++) {
String line = Utils.weakClean(csvLines.get(j));
if (Utils.v(line)) {
String[] cellsLine = line.split("\t");
if (cellsLine.length > 0 && cellsLine.length > h) {
String cell = cellsLine[h];
if (Utils.isInteger(cell)) {
if (type.equals(""))
type = "xsd:int";
} else if (Utils.isDouble(cell)) {
if (!type.equals("xsd:string"))
type = "xsd:double";
} else if (Utils.v(cell)) {
type = "xsd:string";
break;
}
} else {
type = "xsd:string";
}
}
}
if (type.equals(""))
type = "xsd:string";
dataBean.setType(type);
}
}
configBean.getMapData().put(dataBean.getName(), dataBean);
}
if (skosData.size() > 0)
extractSkosConcept(csvLines, skosData);
} catch (IOException e) {
log.error("Error to read lines", e);
}
configExtrated.put(configBean.getId(), configBean);
log.info("Finaliza tratamiento para " + id + letters);
}
cont = 0;
size = configExtrated.keySet().size();
for (String key : configExtrated.keySet()) {
ConfigBean configBean = configExtrated.get(key);
String letters = "-";
for (String letter : configBean.getLetters()) {
letters = letters + letter + "-";
}
letters = letters.substring(0, letters.length() - 1);
configBean.setNameFile("Informe-" + configBean.getId() + letters + ".csv");
log.info("Comienza a escribirse el archivo " + "Informe-" + configBean.getId() + letters + ".csv " + (++cont) + "/" + size);
configBean.toCSV();
log.info("Finaliza de escribirse el archivo " + "Informe-" + configBean.getId() + letters + ".csv");
}
generateSkosMapping();
log.debug("End generateAllConfig");
}
use of com.localidata.process.bean.ConfigBean in project local-data-aragopedia by aragonopendata.
the class GenerateData method readConfig.
private void readConfig() {
log.debug("Init readConfig");
log.info("Comienza a extraerse la configuración");
File configDirectoryFile = new File(configDirectoryString);
File areasReportFile = new File(outputDirectoryString + File.separator + "areas.txt");
Collection<File> listCSV = FileUtils.listFiles(configDirectoryFile, extensionsConfig, true);
int cont = 0;
int size = listCSV.size();
for (File file : listCSV) {
log.info("Se extrae el fichero " + file.getName() + " " + (++cont) + " " + size);
if (!file.getName().startsWith("mapping")) {
ConfigBean configBean = new ConfigBean();
configBean.setNameFile(file.getName());
String id = file.getName().substring(8);
String areas = "";
id = id.replace(".csv", "");
id = id.replace(".xlsx", "");
if (id.contains("TC")) {
id = id.replace("TC", "");
configBean.getLetters().add("TC");
areas += "TC ";
}
if (id.contains("TM")) {
id = id.replace("TM", "");
configBean.getLetters().add("TM");
areas += "TM ";
}
if (id.contains("TP")) {
id = id.replace("TP", "");
configBean.getLetters().add("TP");
areas += "TP ";
}
if (id.contains("A")) {
id = id.replace("A", "");
configBean.getLetters().add("A");
areas += "A ";
}
while (id.charAt(id.length() - 1) == '-') {
id = id.substring(0, id.length() - 1);
}
configBean.setId(id);
if (Prop.formatConfig.equals("csv")) {
readCsv(file, configBean);
} else {
readXlsxFile(file, configBean);
}
mapconfig.put(id, configBean);
Utils.stringToFileAppend(id + " " + areas + "\n", areasReportFile);
}
}
for (Iterator<DataBean> it1 = dataWithSkosHierarchical.iterator(); it1.hasNext(); ) {
DataBean data1 = (DataBean) it1.next();
if (data1.getRelationKos() != null) {
DataBean data2 = mapconfig.get(data1.getIdConfig()).getMapData().get(data1.getRelationKos());
HashMap<String, SkosBean> mapSkos = data1.mergeSkos(data2);
if (mapSkos != null) {
log.info("Kos " + data1.getName() + " is parent of " + data2.getName());
data2.setWriteSkos(false);
data1.setMapSkos(mapSkos);
data2.setMapSkos(mapSkos);
mapconfig.get(data1.getIdConfig()).getMapData().get(data1.getNameNormalized()).setMapSkos(mapSkos);
mapconfig.get(data2.getIdConfig()).getMapData().get(data2.getNameNormalized()).setMapSkos(mapSkos);
}
}
}
log.info("Finaliza de extraerse la configuración");
log.debug("End readConfig");
}
use of com.localidata.process.bean.ConfigBean in project local-data-aragopedia by aragonopendata.
the class GenerateData method writeInformationTTL.
private void writeInformationTTL() {
log.debug("Init extractInformation");
File inputDirectoryFile = new File(inputDirectoryString);
File propertiesFile = new File(outputDirectoryString + File.separator + "DatosTTL" + File.separator + "codelists" + File.separator + "properties.ttl");
File dsdFile = new File(outputDirectoryString + File.separator + "DatosTTL" + File.separator + "dataStructures" + File.separator + "dsd.ttl");
File errorReportFile = new File(outputDirectoryString + File.separator + "errorReport.txt");
TransformToRDF.propertiesContent.append(TransformToRDF.addPrefix());
Utils.stringToFileAppend(TransformToRDF.addPrefix().toString(), dsdFile);
Collection<File> listCSV = FileUtils.listFiles(inputDirectoryFile, extensions, true);
int numfile = 1;
for (File file : listCSV) {
try {
String fileName = "";
String fileLetter = "";
if (file.getName().endsWith("A.csv")) {
fileName = file.getName().substring(0, file.getName().length() - 5);
fileLetter = file.getName().substring(file.getName().length() - 5, file.getName().length() - 4);
} else {
fileName = file.getName().substring(0, file.getName().length() - 6);
fileLetter = file.getName().substring(file.getName().length() - 6, file.getName().length() - 4);
}
ConfigBean configBean = mapconfig.get(fileName);
if (configBean != null) {
File outputDirectoryFile = new File(outputDirectoryString + File.separator + "DatosTTL" + File.separator + "informes" + File.separator + fileName + fileLetter + ".ttl");
log.info("Init file " + fileName + fileLetter + ". Size " + FileUtils.sizeOf(file) + " " + numfile + "/" + listCSV.size());
List<String> csvLines = FileUtils.readLines(file, "UTF-8");
TransformToRDF transformToRDF = new TransformToRDF(csvLines, outputDirectoryFile, propertiesFile, dsdFile, errorReportFile, configBean);
transformToRDF.initTransformation(fileName + fileLetter, numfile, fileName, dsdList, propertiesList);
log.info("End file " + outputDirectoryFile.getName() + " " + numfile + "/" + listCSV.size());
} else {
log.error("Error al extraer la configuración de " + fileName);
}
numfile++;
} catch (Exception e) {
log.error("Error al extraer la información ", e);
}
}
log.debug("End extractInformation");
}
Aggregations