use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.
the class FileMappingService method extractColumns.
public void extractColumns(FileMapping mapping) {
File file = FileUtils.getFile(mapping.getPath());
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(file));
} catch (FileNotFoundException e) {
throw new InternalException("The file '%s' is not found", file);
}
FileSetting setting = mapping.getFileSetting();
String delimiter = setting.getDelimiter();
Pattern pattern = Pattern.compile(setting.getSkippedLine());
String[] columnNames;
String[] columnValues;
try {
String line;
while ((line = reader.readLine()) != null) {
if (!pattern.matcher(line).matches()) {
break;
}
}
Ex.check(line != null, "The file has no data line can treat as header");
String[] firstLine = StringUtils.split(line, delimiter);
if (setting.isHasHeader()) {
// The first line as column names
columnNames = firstLine;
// The second line as column values
line = reader.readLine();
columnValues = StringUtil.split(line, delimiter);
} else {
// Let columns names as: column-1, column-2 ...
columnNames = new String[firstLine.length];
for (int i = 1; i <= firstLine.length; i++) {
columnNames[i - 1] = "col-" + i;
}
// The first line as column values
columnValues = firstLine;
}
} catch (IOException e) {
throw new InternalException("Failed to read header and sample " + "data from file '%s'", file);
} finally {
IOUtils.closeQuietly(reader);
}
setting.setColumnNames(Arrays.asList(columnNames));
setting.setColumnValues(Arrays.asList(columnValues));
}
use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.
the class LoadTaskService method buildFileSource.
private FileSource buildFileSource(FileMapping fileMapping) {
// Set input source
FileSource source = new FileSource();
source.path(fileMapping.getPath());
FileSetting setting = fileMapping.getFileSetting();
Ex.check(setting.getColumnNames() != null, "Must do file setting firstly");
source.header(setting.getColumnNames().toArray(new String[] {}));
// NOTE: format and delimiter must be CSV and "," temporarily
source.format(FileFormat.valueOf(setting.getFormat()));
source.delimiter(setting.getDelimiter());
source.charset(setting.getCharset());
source.dateFormat(setting.getDateFormat());
source.timeZone(setting.getTimeZone());
source.skippedLine().regex(setting.getSkippedLine());
// Set list format
source.listFormat(new com.baidu.hugegraph.loader.source.file.ListFormat());
ListFormat listFormat = setting.getListFormat();
source.listFormat().startSymbol(listFormat.getStartSymbol());
source.listFormat().endSymbol(listFormat.getEndSymbol());
source.listFormat().elemDelimiter(listFormat.getElemDelimiter());
return source;
}
use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.
the class FileMappingController method checkEdgeMappingValid.
private void checkEdgeMappingValid(int connId, EdgeMapping edgeMapping, FileMapping fileMapping) {
EdgeLabelEntity el = this.elService.get(edgeMapping.getLabel(), connId);
VertexLabelEntity source = this.vlService.get(el.getSourceLabel(), connId);
VertexLabelEntity target = this.vlService.get(el.getTargetLabel(), connId);
Ex.check(!source.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
Ex.check(!target.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
Ex.check(!CollectionUtils.isEmpty(edgeMapping.getSourceFields()), "load.file-mapping.edge.source-fields-cannot-be-empty");
Ex.check(!CollectionUtils.isEmpty(edgeMapping.getTargetFields()), "load.file-mapping.edge.target-fields-cannot-be-empty");
FileSetting fileSetting = fileMapping.getFileSetting();
List<String> columnNames = fileSetting.getColumnNames();
Ex.check(columnNames.containsAll(edgeMapping.getSourceFields()), "load.file-mapping.edge.source-fields-should-in-column-names", edgeMapping.getSourceFields(), columnNames);
Ex.check(columnNames.containsAll(edgeMapping.getTargetFields()), "load.file-mapping.edge.target-fields-should-in-column-names", edgeMapping.getTargetFields(), columnNames);
Ex.check(CollectionUtil.allUnique(edgeMapping.fieldMappingToMap().values()), "load.file-mapping.mapping-fields-should-no-duplicate");
this.checkMappingValid(edgeMapping, fileMapping);
}
use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.
the class FileMappingController method fileSetting.
@PostMapping("{id}/file-setting")
public FileMapping fileSetting(@PathVariable("id") int id, @RequestBody FileSetting newEntity) {
Ex.check(!StringUtils.isEmpty(newEntity.getDelimiter()), "load.file-mapping.file-setting.delimiter-cannot-be-empty");
Ex.check(!StringUtils.isEmpty(newEntity.getCharset()), "load.file-mapping.file-setting.charset-cannot-be-empty");
Ex.check(!StringUtils.isEmpty(newEntity.getDateFormat()), "load.file-mapping.file-setting.dateformat-cannot-be-empty");
Ex.check(!StringUtils.isEmpty(newEntity.getTimeZone()), "load.file-mapping.file-setting.timezone-cannot-be-empty");
Ex.check(!StringUtils.isEmpty(newEntity.getSkippedLine()), "load.file-mapping.file-setting.skippedline-cannot-be-empty");
FileMapping mapping = this.service.get(id);
if (mapping == null) {
throw new ExternalException("load.file-mapping.not-exist.id", id);
}
// Change format to TEXT if needed
newEntity.changeFormatIfNeeded();
FileSetting oldEntity = mapping.getFileSetting();
FileSetting entity = this.mergeEntity(oldEntity, newEntity);
mapping.setFileSetting(entity);
// Read column names and values then fill it
this.service.extractColumns(mapping);
this.service.update(mapping);
return mapping;
}
use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.
the class FileMappingController method checkVertexMappingValid.
private void checkVertexMappingValid(int connId, VertexMapping vertexMapping, FileMapping fileMapping) {
VertexLabelEntity vl = this.vlService.get(vertexMapping.getLabel(), connId);
Ex.check(!vl.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
Ex.check(!CollectionUtils.isEmpty(vertexMapping.getIdFields()), "load.file-mapping.vertex.id-fields-cannot-be-empty");
FileSetting fileSetting = fileMapping.getFileSetting();
List<String> columnNames = fileSetting.getColumnNames();
Ex.check(columnNames.containsAll(vertexMapping.getIdFields()), "load.file-mapping.vertex.id-fields-should-in-column-names", vertexMapping.getIdFields(), columnNames);
if (vl.getIdStrategy().isPrimaryKey()) {
Ex.check(vertexMapping.getIdFields().size() == vl.getPrimaryKeys().size(), "load.file-mapping.vertex.id-fields-should-same-size-pks");
Ex.check(!CollectionUtils.containsAny(vertexMapping.fieldMappingToMap().values(), vl.getPrimaryKeys()), "load.file-mapping.vertex.mapping-fields-cannot-contains-pk");
} else {
Ex.check(vertexMapping.getIdFields().size() == 1, "load.file-mapping.vertex.id-fields-should-only-one");
}
Ex.check(CollectionUtil.allUnique(vertexMapping.fieldMappingToMap().values()), "load.file-mapping.mapping-fields-should-no-duplicate");
this.checkMappingValid(vertexMapping, fileMapping);
}
Aggregations