Search in sources :

Example 1 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class CarbonUpdateUtil method cleanUpDeltaFiles.

/**
   * Handling of the clean up of old carbondata files, index files , delte delta,
   * update status files.
   * @param table clean up will be handled on this table.
   * @param forceDelete if true then max query execution timeout will not be considered.
   */
public static void cleanUpDeltaFiles(CarbonTable table, boolean forceDelete) {
    SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
    CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(table.getAbsoluteTableIdentifier().getStorePath(), table.getAbsoluteTableIdentifier().getCarbonTableIdentifier());
    LoadMetadataDetails[] details = ssm.readLoadMetadata(table.getMetaDataFilepath());
    String validUpdateStatusFile = "";
    for (LoadMetadataDetails segment : details) {
        // take the update status file name from 0th segment.
        validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
        if (segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) || segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS)) {
            // take the list of files from this segment.
            String segmentPath = carbonTablePath.getCarbonDataDirectoryPath("0", segment.getLoadName());
            CarbonFile segDir = FileFactory.getCarbonFile(segmentPath, FileFactory.getFileType(segmentPath));
            CarbonFile[] allSegmentFiles = segDir.listFiles();
            // scan through the segment and find the carbondatafiles and index files.
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table.getAbsoluteTableIdentifier());
            // get Invalid update  delta files.
            CarbonFile[] invalidUpdateDeltaFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_DELTA_FILE_EXT, true, allSegmentFiles);
            for (CarbonFile invalidFile : invalidUpdateDeltaFiles) {
                compareTimestampsAndDelete(invalidFile, forceDelete, false);
            }
            // do the same for the index files.
            CarbonFile[] invalidIndexFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT, true, allSegmentFiles);
            for (CarbonFile invalidFile : invalidIndexFiles) {
                compareTimestampsAndDelete(invalidFile, forceDelete, false);
            }
            // now handle all the delete delta files which needs to be deleted.
            // there are 2 cases here .
            // 1. if the block is marked as compacted then the corresponding delta files
            //    can be deleted if query exec timeout is done.
            // 2. if the block is in success state then also there can be delete
            //    delta compaction happened and old files can be deleted.
            SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
            for (SegmentUpdateDetails block : updateDetails) {
                CarbonFile[] completeListOfDeleteDeltaFiles;
                CarbonFile[] invalidDeleteDeltaFiles;
                if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
                    continue;
                }
                // case 1
                if (CarbonUpdateUtil.isBlockInvalid(block.getStatus())) {
                    completeListOfDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, true, allSegmentFiles);
                    for (CarbonFile invalidFile : completeListOfDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                    CarbonFile[] blockRelatedFiles = updateStatusManager.getAllBlockRelatedFiles(block.getBlockName(), allSegmentFiles, block.getActualBlockName());
                    for (CarbonFile invalidFile : blockRelatedFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                } else {
                    invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, false, allSegmentFiles);
                    for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                }
            }
        }
    }
    // delete the update table status files which are old.
    if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
        final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
        CarbonFile metaFolder = FileFactory.getCarbonFile(carbonTablePath.getMetadataDirectoryPath(), FileFactory.getFileType(carbonTablePath.getMetadataDirectoryPath()));
        CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
                    // we only send invalid ones to delete.
                    if (!file.getName().endsWith(updateStatusTimestamp)) {
                        return true;
                    }
                }
                return false;
            }
        });
        for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
            compareTimestampsAndDelete(invalidFile, forceDelete, true);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager)

Example 2 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class StoreCreator method writeLoadMetadata.

public static void writeLoadMetadata(CarbonDataLoadSchema schema, String databaseName, String tableName, List<LoadMetadataDetails> listOfLoadFolderDetails) throws IOException {
    LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails();
    loadMetadataDetails.setLoadEndTime(System.currentTimeMillis());
    loadMetadataDetails.setLoadStatus("SUCCESS");
    loadMetadataDetails.setLoadName(String.valueOf(0));
    loadMetadataDetails.setLoadStartTime(loadMetadataDetails.getTimeStamp(readCurrentTime()));
    listOfLoadFolderDetails.add(loadMetadataDetails);
    String dataLoadLocation = schema.getCarbonTable().getMetaDataFilepath() + File.separator + CarbonCommonConstants.LOADMETADATA_FILENAME;
    DataOutputStream dataOutputStream;
    Gson gsonObjectToWrite = new Gson();
    BufferedWriter brWriter = null;
    AtomicFileOperations writeOperation = new AtomicFileOperationsImpl(dataLoadLocation, FileFactory.getFileType(dataLoadLocation));
    try {
        dataOutputStream = writeOperation.openForWrite(FileWriteOperation.OVERWRITE);
        brWriter = new BufferedWriter(new OutputStreamWriter(dataOutputStream, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)));
        String metadataInstance = gsonObjectToWrite.toJson(listOfLoadFolderDetails.toArray());
        brWriter.write(metadataInstance);
    } catch (Exception ex) {
        throw ex;
    } finally {
        try {
            if (null != brWriter) {
                brWriter.flush();
            }
        } catch (Exception e) {
            throw e;
        }
        CarbonUtil.closeStreams(brWriter);
    }
    writeOperation.close();
}
Also used : LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) DataOutputStream(java.io.DataOutputStream) Gson(com.google.gson.Gson) OutputStreamWriter(java.io.OutputStreamWriter) AtomicFileOperations(org.apache.carbondata.core.fileoperations.AtomicFileOperations) AtomicFileOperationsImpl(org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter)

Example 3 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class StoreCreator method createCarbonStore.

/**
   * Create store without any restructure
   */
public static void createCarbonStore() {
    try {
        String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
        File storeDir = new File(absoluteTableIdentifier.getStorePath());
        CarbonUtil.deleteFoldersAndFiles(storeDir);
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, absoluteTableIdentifier.getStorePath());
        CarbonTable table = createTable();
        writeDictionary(factFilePath, table);
        CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
        CarbonLoadModel loadModel = new CarbonLoadModel();
        loadModel.setCarbonDataLoadSchema(schema);
        loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setFactFilePath(factFilePath);
        loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
        loadModel.setStorePath(absoluteTableIdentifier.getStorePath());
        loadModel.setDateFormat(null);
        loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
        loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
        loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
        loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
        loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
        loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
        loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
        loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
        loadModel.setTaskNo("0");
        loadModel.setSegmentId("0");
        loadModel.setPartitionId("0");
        loadModel.setFactTimeStamp(System.currentTimeMillis());
        loadModel.setMaxColumns("10");
        executeGraph(loadModel, absoluteTableIdentifier.getStorePath());
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.model.CarbonDataLoadSchema) File(java.io.File) IOException(java.io.IOException)

Example 4 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class StoreCreator method executeGraph.

/**
   * Execute graph which will further load data
   *
   * @param loadModel
   * @param storeLocation
   * @throws Exception
   */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setMaxColumns(configuration, "10");
    CSVInputFormat.setNumberOfColumns(configuration, "7");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
    info.setDatabaseName(databaseName);
    info.setTableName(tableName);
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.csvload.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.csvload.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.csvload.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.csvload.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.newflow.DataLoadExecutor) SchemaInfo(org.apache.carbondata.processing.api.dataloader.SchemaInfo)

Example 5 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class LoadMetadataDetailsUnitTest method testEqualsObjectIsLoadMetadataDetailsLoadNameNull.

@Test
public void testEqualsObjectIsLoadMetadataDetailsLoadNameNull() throws Exception {
    LoadMetadataDetails obj = new LoadMetadataDetails();
    obj.setLoadName("test");
    boolean result = loadMetadataDetails.equals(obj);
    assertEquals(false, result);
}
Also used : LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) Test(org.junit.Test)

Aggregations

LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)27 ArrayList (java.util.ArrayList)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)10 IOException (java.io.IOException)9 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)7 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)7 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)6 File (java.io.File)4 Test (org.junit.Test)4 ParseException (java.text.ParseException)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 Gson (com.google.gson.Gson)2 BufferedWriter (java.io.BufferedWriter)2 DataOutputStream (java.io.DataOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)2 AtomicFileOperations (org.apache.carbondata.core.fileoperations.AtomicFileOperations)2 AtomicFileOperationsImpl (org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl)2 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)2