Search in sources :

Example 1 with FileSetting

use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.

the class FileMappingService method extractColumns.

public void extractColumns(FileMapping mapping) {
    File file = FileUtils.getFile(mapping.getPath());
    BufferedReader reader;
    try {
        reader = new BufferedReader(new FileReader(file));
    } catch (FileNotFoundException e) {
        throw new InternalException("The file '%s' is not found", file);
    }
    FileSetting setting = mapping.getFileSetting();
    String delimiter = setting.getDelimiter();
    Pattern pattern = Pattern.compile(setting.getSkippedLine());
    String[] columnNames;
    String[] columnValues;
    try {
        String line;
        while ((line = reader.readLine()) != null) {
            if (!pattern.matcher(line).matches()) {
                break;
            }
        }
        Ex.check(line != null, "The file has no data line can treat as header");
        String[] firstLine = StringUtils.split(line, delimiter);
        if (setting.isHasHeader()) {
            // The first line as column names
            columnNames = firstLine;
            // The second line as column values
            line = reader.readLine();
            columnValues = StringUtil.split(line, delimiter);
        } else {
            // Let columns names as: column-1, column-2 ...
            columnNames = new String[firstLine.length];
            for (int i = 1; i <= firstLine.length; i++) {
                columnNames[i - 1] = "col-" + i;
            }
            // The first line as column values
            columnValues = firstLine;
        }
    } catch (IOException e) {
        throw new InternalException("Failed to read header and sample " + "data from file '%s'", file);
    } finally {
        IOUtils.closeQuietly(reader);
    }
    setting.setColumnNames(Arrays.asList(columnNames));
    setting.setColumnValues(Arrays.asList(columnValues));
}
Also used : FileSetting(com.baidu.hugegraph.entity.load.FileSetting) Pattern(java.util.regex.Pattern) BufferedReader(java.io.BufferedReader) FileNotFoundException(java.io.FileNotFoundException) FileReader(java.io.FileReader) IOException(java.io.IOException) File(java.io.File) MultipartFile(org.springframework.web.multipart.MultipartFile) InternalException(com.baidu.hugegraph.exception.InternalException)

Example 2 with FileSetting

use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.

the class LoadTaskService method buildFileSource.

private FileSource buildFileSource(FileMapping fileMapping) {
    // Set input source
    FileSource source = new FileSource();
    source.path(fileMapping.getPath());
    FileSetting setting = fileMapping.getFileSetting();
    Ex.check(setting.getColumnNames() != null, "Must do file setting firstly");
    source.header(setting.getColumnNames().toArray(new String[] {}));
    // NOTE: format and delimiter must be CSV and "," temporarily
    source.format(FileFormat.valueOf(setting.getFormat()));
    source.delimiter(setting.getDelimiter());
    source.charset(setting.getCharset());
    source.dateFormat(setting.getDateFormat());
    source.timeZone(setting.getTimeZone());
    source.skippedLine().regex(setting.getSkippedLine());
    // Set list format
    source.listFormat(new com.baidu.hugegraph.loader.source.file.ListFormat());
    ListFormat listFormat = setting.getListFormat();
    source.listFormat().startSymbol(listFormat.getStartSymbol());
    source.listFormat().endSymbol(listFormat.getEndSymbol());
    source.listFormat().elemDelimiter(listFormat.getElemDelimiter());
    return source;
}
Also used : FileSetting(com.baidu.hugegraph.entity.load.FileSetting) FileSource(com.baidu.hugegraph.loader.source.file.FileSource) ListFormat(com.baidu.hugegraph.entity.load.ListFormat)

Example 3 with FileSetting

use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.

the class FileMappingController method checkEdgeMappingValid.

private void checkEdgeMappingValid(int connId, EdgeMapping edgeMapping, FileMapping fileMapping) {
    EdgeLabelEntity el = this.elService.get(edgeMapping.getLabel(), connId);
    VertexLabelEntity source = this.vlService.get(el.getSourceLabel(), connId);
    VertexLabelEntity target = this.vlService.get(el.getTargetLabel(), connId);
    Ex.check(!source.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
    Ex.check(!target.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
    Ex.check(!CollectionUtils.isEmpty(edgeMapping.getSourceFields()), "load.file-mapping.edge.source-fields-cannot-be-empty");
    Ex.check(!CollectionUtils.isEmpty(edgeMapping.getTargetFields()), "load.file-mapping.edge.target-fields-cannot-be-empty");
    FileSetting fileSetting = fileMapping.getFileSetting();
    List<String> columnNames = fileSetting.getColumnNames();
    Ex.check(columnNames.containsAll(edgeMapping.getSourceFields()), "load.file-mapping.edge.source-fields-should-in-column-names", edgeMapping.getSourceFields(), columnNames);
    Ex.check(columnNames.containsAll(edgeMapping.getTargetFields()), "load.file-mapping.edge.target-fields-should-in-column-names", edgeMapping.getTargetFields(), columnNames);
    Ex.check(CollectionUtil.allUnique(edgeMapping.fieldMappingToMap().values()), "load.file-mapping.mapping-fields-should-no-duplicate");
    this.checkMappingValid(edgeMapping, fileMapping);
}
Also used : EdgeLabelEntity(com.baidu.hugegraph.entity.schema.EdgeLabelEntity) FileSetting(com.baidu.hugegraph.entity.load.FileSetting) VertexLabelEntity(com.baidu.hugegraph.entity.schema.VertexLabelEntity)

Example 4 with FileSetting

use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.

the class FileMappingController method fileSetting.

@PostMapping("{id}/file-setting")
public FileMapping fileSetting(@PathVariable("id") int id, @RequestBody FileSetting newEntity) {
    Ex.check(!StringUtils.isEmpty(newEntity.getDelimiter()), "load.file-mapping.file-setting.delimiter-cannot-be-empty");
    Ex.check(!StringUtils.isEmpty(newEntity.getCharset()), "load.file-mapping.file-setting.charset-cannot-be-empty");
    Ex.check(!StringUtils.isEmpty(newEntity.getDateFormat()), "load.file-mapping.file-setting.dateformat-cannot-be-empty");
    Ex.check(!StringUtils.isEmpty(newEntity.getTimeZone()), "load.file-mapping.file-setting.timezone-cannot-be-empty");
    Ex.check(!StringUtils.isEmpty(newEntity.getSkippedLine()), "load.file-mapping.file-setting.skippedline-cannot-be-empty");
    FileMapping mapping = this.service.get(id);
    if (mapping == null) {
        throw new ExternalException("load.file-mapping.not-exist.id", id);
    }
    // Change format to TEXT if needed
    newEntity.changeFormatIfNeeded();
    FileSetting oldEntity = mapping.getFileSetting();
    FileSetting entity = this.mergeEntity(oldEntity, newEntity);
    mapping.setFileSetting(entity);
    // Read column names and values then fill it
    this.service.extractColumns(mapping);
    this.service.update(mapping);
    return mapping;
}
Also used : FileMapping(com.baidu.hugegraph.entity.load.FileMapping) FileSetting(com.baidu.hugegraph.entity.load.FileSetting) ExternalException(com.baidu.hugegraph.exception.ExternalException) PostMapping(org.springframework.web.bind.annotation.PostMapping)

Example 5 with FileSetting

use of com.baidu.hugegraph.entity.load.FileSetting in project incubator-hugegraph-toolchain by apache.

the class FileMappingController method checkVertexMappingValid.

private void checkVertexMappingValid(int connId, VertexMapping vertexMapping, FileMapping fileMapping) {
    VertexLabelEntity vl = this.vlService.get(vertexMapping.getLabel(), connId);
    Ex.check(!vl.getIdStrategy().isAutomatic(), "load.file-mapping.vertex.automatic-id-unsupported");
    Ex.check(!CollectionUtils.isEmpty(vertexMapping.getIdFields()), "load.file-mapping.vertex.id-fields-cannot-be-empty");
    FileSetting fileSetting = fileMapping.getFileSetting();
    List<String> columnNames = fileSetting.getColumnNames();
    Ex.check(columnNames.containsAll(vertexMapping.getIdFields()), "load.file-mapping.vertex.id-fields-should-in-column-names", vertexMapping.getIdFields(), columnNames);
    if (vl.getIdStrategy().isPrimaryKey()) {
        Ex.check(vertexMapping.getIdFields().size() == vl.getPrimaryKeys().size(), "load.file-mapping.vertex.id-fields-should-same-size-pks");
        Ex.check(!CollectionUtils.containsAny(vertexMapping.fieldMappingToMap().values(), vl.getPrimaryKeys()), "load.file-mapping.vertex.mapping-fields-cannot-contains-pk");
    } else {
        Ex.check(vertexMapping.getIdFields().size() == 1, "load.file-mapping.vertex.id-fields-should-only-one");
    }
    Ex.check(CollectionUtil.allUnique(vertexMapping.fieldMappingToMap().values()), "load.file-mapping.mapping-fields-should-no-duplicate");
    this.checkMappingValid(vertexMapping, fileMapping);
}
Also used : FileSetting(com.baidu.hugegraph.entity.load.FileSetting) VertexLabelEntity(com.baidu.hugegraph.entity.schema.VertexLabelEntity)

Aggregations

FileSetting (com.baidu.hugegraph.entity.load.FileSetting)6 VertexLabelEntity (com.baidu.hugegraph.entity.schema.VertexLabelEntity)2 FileMapping (com.baidu.hugegraph.entity.load.FileMapping)1 ListFormat (com.baidu.hugegraph.entity.load.ListFormat)1 EdgeLabelEntity (com.baidu.hugegraph.entity.schema.EdgeLabelEntity)1 ExternalException (com.baidu.hugegraph.exception.ExternalException)1 InternalException (com.baidu.hugegraph.exception.InternalException)1 FileSource (com.baidu.hugegraph.loader.source.file.FileSource)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 Pattern (java.util.regex.Pattern)1 PostMapping (org.springframework.web.bind.annotation.PostMapping)1 MultipartFile (org.springframework.web.multipart.MultipartFile)1