Search in sources :

Example 31 with FileState

use of org.smartdata.model.FileState in project SSM by Intel-bigdata.

the class TestMetaStore method testInsertUpdateFileState.

@Test
public void testInsertUpdateFileState() throws MetaStoreException {
    // Normal file
    FileState fileState = new NormalFileState("/test1");
    metaStore.insertUpdateFileState(fileState);
    Assert.assertEquals(fileState, metaStore.getFileState("/test1"));
    // Compression & Processing (without compression info)
    // fileState = new FileState("/test1", FileState.FileType.COMPRESSION,
    // FileState.FileStage.PROCESSING);
    // metaStore.insertUpdateFileState(fileState);
    // Assert.assertEquals(fileState, metaStore.getFileState("/test1"));
    // Compression & Done (with compression info)
    int bufferSize = 1024;
    long originalLen = 100;
    long compressedLen = 50;
    Long[] originPos = { 0L, 30L, 60L, 90L };
    Long[] compressedPos = { 0L, 13L, 30L, 41L };
    fileState = new CompressionFileState("/test1", bufferSize, originalLen, compressedLen, originPos, compressedPos);
    metaStore.insertUpdateFileState(fileState);
    compareCompressionInfo(fileState, metaStore.getFileState("/test1"));
}
Also used : NormalFileState(org.smartdata.model.NormalFileState) FileState(org.smartdata.model.FileState) CompressionFileState(org.smartdata.model.CompressionFileState) NormalFileState(org.smartdata.model.NormalFileState) CompressionFileState(org.smartdata.model.CompressionFileState) Test(org.junit.Test)

Example 32 with FileState

use of org.smartdata.model.FileState in project SSM by Intel-bigdata.

the class SmallFilePlugin method preSubmitCmdlet.

@Override
public List<String> preSubmitCmdlet(final RuleInfo ruleInfo, List<String> objects) {
    if (ruleInfo.getRuleText().contains(COMPACT_ACTION_NAME)) {
        if (objects == null || objects.isEmpty()) {
            LOG.debug("Objects is null or empty.");
            return objects;
        }
        // Split valid small files according to the file permission
        Map<String, FileInfo> containerFileInfoMap = getContainerFileInfos();
        Map<SmallFileStatus, List<String>> smallFileStateMap = new HashMap<>();
        for (String object : objects) {
            LOG.debug("Start handling the file: {}.", object);
            // Check if the file is container file
            if (!object.endsWith("/")) {
                String fileName = object.substring(object.lastIndexOf("/") + 1, object.length());
                if (fileName.startsWith(CONTAINER_FILE_PREFIX) || containerFileInfoMap.containsKey(object)) {
                    LOG.debug("{} is container file.", object);
                    continue;
                }
            }
            // Check file info and state
            try {
                FileInfo fileInfo = metaStore.getFile(object);
                FileState fileState = metaStore.getFileState(object);
                if (fileInfo != null && fileInfo.getLength() > 0 && fileInfo.getLength() < containerFileSizeThreshold && fileState.getFileType().equals(FileState.FileType.NORMAL) && fileState.getFileStage().equals(FileState.FileStage.DONE)) {
                    SmallFileStatus smallFileStatus = new SmallFileStatus(fileInfo);
                    if (smallFileStateMap.containsKey(smallFileStatus)) {
                        smallFileStateMap.get(smallFileStatus).add(object);
                    } else {
                        firstFileInfoCache.put(object, fileInfo);
                        List<String> list = new ArrayList<>();
                        list.add(object);
                        smallFileStateMap.put(smallFileStatus, list);
                    }
                } else {
                    LOG.debug("Invalid file {} for small file compact.", object);
                }
            } catch (MetaStoreException e) {
                LOG.error(String.format("Failed to get file info of %s.", object), e);
            }
        }
        // Split small files according to the batch size
        List<String> smallFileList = new ArrayList<>();
        for (List<String> listElement : smallFileStateMap.values()) {
            int size = listElement.size();
            for (int i = 0; i < size; i += batchSize) {
                int toIndex = (i + batchSize <= size) ? i + batchSize : size;
                String smallFiles = new Gson().toJson(listElement.subList(i, toIndex));
                smallFileList.add(smallFiles);
            }
        }
        // Update container file info cache for preSubmitCmdletDescriptor
        updateContainerFileInfoCache(ruleInfo, containerFileInfoMap);
        return smallFileList;
    } else {
        return objects;
    }
}
Also used : FileState(org.smartdata.model.FileState) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) MetaStoreException(org.smartdata.metastore.MetaStoreException) FileInfo(org.smartdata.model.FileInfo) ArrayList(java.util.ArrayList) List(java.util.List)

Example 33 with FileState

use of org.smartdata.model.FileState in project SSM by Intel-bigdata.

the class MetaStore method getFileState.

/**
 * Get FileState of the given path.
 *
 * @param path
 * @return
 * @throws MetaStoreException
 */
public FileState getFileState(String path) throws MetaStoreException {
    FileState fileState;
    try {
        fileState = fileStateDao.getByPath(path);
        // Fetch info from corresponding table to regenerate a specific file state
        switch(fileState.getFileType()) {
            case NORMAL:
                fileState = new NormalFileState(path);
                break;
            case COMPACT:
                fileState = smallFileDao.getFileStateByPath(path);
                break;
            case COMPRESSION:
                CompressionFileState compressionFileState = getCompressionInfo(path);
                if (compressionFileState != null) {
                    compressionFileState.setFileStage(fileState.getFileStage());
                    fileState = compressionFileState;
                }
                break;
            case S3:
                fileState = new S3FileState(path);
                break;
            default:
        }
    } catch (EmptyResultDataAccessException e1) {
        fileState = new NormalFileState(path);
    } catch (Exception e2) {
        throw new MetaStoreException(e2);
    }
    return fileState;
}
Also used : S3FileState(org.smartdata.model.S3FileState) NormalFileState(org.smartdata.model.NormalFileState) CompactFileState(org.smartdata.model.CompactFileState) FileState(org.smartdata.model.FileState) CompressionFileState(org.smartdata.model.CompressionFileState) NormalFileState(org.smartdata.model.NormalFileState) CompressionFileState(org.smartdata.model.CompressionFileState) EmptyResultDataAccessException(org.springframework.dao.EmptyResultDataAccessException) S3FileState(org.smartdata.model.S3FileState) EmptyResultDataAccessException(org.springframework.dao.EmptyResultDataAccessException) SQLException(java.sql.SQLException)

Example 34 with FileState

use of org.smartdata.model.FileState in project SSM by Intel-bigdata.

the class FileStateDao method getByPaths.

public Map<String, FileState> getByPaths(List<String> paths) {
    NamedParameterJdbcTemplate namedParameterJdbcTemplate = new NamedParameterJdbcTemplate(dataSource);
    Map<String, FileState> fileStateMap = new HashMap<>();
    MapSqlParameterSource parameterSource = new MapSqlParameterSource();
    parameterSource.addValue("paths", paths);
    List<FileState> fileStates = namedParameterJdbcTemplate.query("SELECT * FROM " + TABLE_NAME + " WHERE path IN (:paths)", parameterSource, new FileStateRowMapper());
    for (FileState fileState : fileStates) {
        fileStateMap.put(fileState.getPath(), fileState);
    }
    return fileStateMap;
}
Also used : FileState(org.smartdata.model.FileState) MapSqlParameterSource(org.springframework.jdbc.core.namedparam.MapSqlParameterSource) NamedParameterJdbcTemplate(org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate) HashMap(java.util.HashMap)

Aggregations

FileState (org.smartdata.model.FileState)34 CompressionFileState (org.smartdata.model.CompressionFileState)26 CompactFileState (org.smartdata.model.CompactFileState)19 NormalFileState (org.smartdata.model.NormalFileState)15 IOException (java.io.IOException)9 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)4 HdfsFileStatus (org.apache.hadoop.hdfs.protocol.HdfsFileStatus)4 Gson (com.google.gson.Gson)3 HashMap (java.util.HashMap)3 Path (org.apache.hadoop.fs.Path)3 DFSInputStream (org.apache.hadoop.hdfs.DFSInputStream)3 EmptyResultDataAccessException (org.springframework.dao.EmptyResultDataAccessException)3 SQLException (java.sql.SQLException)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)2 HdfsLocatedFileStatus (org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus)2 HdfsNamedFileStatus (org.apache.hadoop.hdfs.protocol.HdfsNamedFileStatus)2 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)2