use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.
the class CarbonUpdateUtil method cleanUpDeltaFiles.
/**
* Handling of the clean up of old carbondata files, index files , delte delta,
* update status files.
* @param table clean up will be handled on this table.
* @param forceDelete if true then max query execution timeout will not be considered.
*/
public static void cleanUpDeltaFiles(CarbonTable table, boolean forceDelete) {
SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(table.getAbsoluteTableIdentifier().getStorePath(), table.getAbsoluteTableIdentifier().getCarbonTableIdentifier());
LoadMetadataDetails[] details = ssm.readLoadMetadata(table.getMetaDataFilepath());
String validUpdateStatusFile = "";
for (LoadMetadataDetails segment : details) {
// take the update status file name from 0th segment.
validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
if (segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) || segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS)) {
// take the list of files from this segment.
String segmentPath = carbonTablePath.getCarbonDataDirectoryPath("0", segment.getLoadName());
CarbonFile segDir = FileFactory.getCarbonFile(segmentPath, FileFactory.getFileType(segmentPath));
CarbonFile[] allSegmentFiles = segDir.listFiles();
// scan through the segment and find the carbondatafiles and index files.
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table.getAbsoluteTableIdentifier());
// get Invalid update delta files.
CarbonFile[] invalidUpdateDeltaFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_DELTA_FILE_EXT, true, allSegmentFiles);
for (CarbonFile invalidFile : invalidUpdateDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
// do the same for the index files.
CarbonFile[] invalidIndexFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT, true, allSegmentFiles);
for (CarbonFile invalidFile : invalidIndexFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
// now handle all the delete delta files which needs to be deleted.
// there are 2 cases here .
// 1. if the block is marked as compacted then the corresponding delta files
// can be deleted if query exec timeout is done.
// 2. if the block is in success state then also there can be delete
// delta compaction happened and old files can be deleted.
SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
for (SegmentUpdateDetails block : updateDetails) {
CarbonFile[] completeListOfDeleteDeltaFiles;
CarbonFile[] invalidDeleteDeltaFiles;
if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
continue;
}
// case 1
if (CarbonUpdateUtil.isBlockInvalid(block.getStatus())) {
completeListOfDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, true, allSegmentFiles);
for (CarbonFile invalidFile : completeListOfDeleteDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
CarbonFile[] blockRelatedFiles = updateStatusManager.getAllBlockRelatedFiles(block.getBlockName(), allSegmentFiles, block.getActualBlockName());
for (CarbonFile invalidFile : blockRelatedFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
} else {
invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, false, allSegmentFiles);
for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
}
}
}
}
// delete the update table status files which are old.
if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
CarbonFile metaFolder = FileFactory.getCarbonFile(carbonTablePath.getMetadataDirectoryPath(), FileFactory.getFileType(carbonTablePath.getMetadataDirectoryPath()));
CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
// we only send invalid ones to delete.
if (!file.getName().endsWith(updateStatusTimestamp)) {
return true;
}
}
return false;
}
});
for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, true);
}
}
}
use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.
the class StoreCreator method writeLoadMetadata.
public static void writeLoadMetadata(CarbonDataLoadSchema schema, String databaseName, String tableName, List<LoadMetadataDetails> listOfLoadFolderDetails) throws IOException {
LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails();
loadMetadataDetails.setLoadEndTime(System.currentTimeMillis());
loadMetadataDetails.setLoadStatus("SUCCESS");
loadMetadataDetails.setLoadName(String.valueOf(0));
loadMetadataDetails.setLoadStartTime(loadMetadataDetails.getTimeStamp(readCurrentTime()));
listOfLoadFolderDetails.add(loadMetadataDetails);
String dataLoadLocation = schema.getCarbonTable().getMetaDataFilepath() + File.separator + CarbonCommonConstants.LOADMETADATA_FILENAME;
DataOutputStream dataOutputStream;
Gson gsonObjectToWrite = new Gson();
BufferedWriter brWriter = null;
AtomicFileOperations writeOperation = new AtomicFileOperationsImpl(dataLoadLocation, FileFactory.getFileType(dataLoadLocation));
try {
dataOutputStream = writeOperation.openForWrite(FileWriteOperation.OVERWRITE);
brWriter = new BufferedWriter(new OutputStreamWriter(dataOutputStream, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)));
String metadataInstance = gsonObjectToWrite.toJson(listOfLoadFolderDetails.toArray());
brWriter.write(metadataInstance);
} catch (Exception ex) {
throw ex;
} finally {
try {
if (null != brWriter) {
brWriter.flush();
}
} catch (Exception e) {
throw e;
}
CarbonUtil.closeStreams(brWriter);
}
writeOperation.close();
}
use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.
the class StoreCreator method createCarbonStore.
/**
* Create store without any restructure
*/
public static void createCarbonStore() {
try {
String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
File storeDir = new File(absoluteTableIdentifier.getStorePath());
CarbonUtil.deleteFoldersAndFiles(storeDir);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, absoluteTableIdentifier.getStorePath());
CarbonTable table = createTable();
writeDictionary(factFilePath, table);
CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
CarbonLoadModel loadModel = new CarbonLoadModel();
loadModel.setCarbonDataLoadSchema(schema);
loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
loadModel.setFactFilePath(factFilePath);
loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
loadModel.setStorePath(absoluteTableIdentifier.getStorePath());
loadModel.setDateFormat(null);
loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
loadModel.setTaskNo("0");
loadModel.setSegmentId("0");
loadModel.setPartitionId("0");
loadModel.setFactTimeStamp(System.currentTimeMillis());
loadModel.setMaxColumns("10");
executeGraph(loadModel, absoluteTableIdentifier.getStorePath());
} catch (Exception e) {
e.printStackTrace();
}
}
use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.
the class StoreCreator method executeGraph.
/**
* Execute graph which will further load data
*
* @param loadModel
* @param storeLocation
* @throws Exception
*/
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
new File(storeLocation).mkdirs();
String outPutLoc = storeLocation + "/etl";
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = databaseName + '_' + tableName + "_1";
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
CarbonProperties.getInstance().addProperty("send.signal.load", "false");
CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
File path = new File(graphPath);
if (path.exists()) {
path.delete();
}
SchemaInfo info = new SchemaInfo();
BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
Configuration configuration = new Configuration();
CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
CSVInputFormat.setMaxColumns(configuration, "10");
CSVInputFormat.setNumberOfColumns(configuration, "7");
TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CSVInputFormat format = new CSVInputFormat();
RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
info.setDatabaseName(databaseName);
info.setTableName(tableName);
writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
File file = new File(segLocation);
File factFile = null;
File[] folderList = file.listFiles();
File folder = null;
for (int i = 0; i < folderList.length; i++) {
if (folderList[i].isDirectory()) {
folder = folderList[i];
}
}
if (folder.isDirectory()) {
File[] files = folder.listFiles();
for (int i = 0; i < files.length; i++) {
if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
factFile = files[i];
break;
}
}
factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
CarbonUtil.deleteFoldersAndFiles(folder);
}
}
use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.
the class LoadMetadataDetailsUnitTest method testEqualsObjectIsLoadMetadataDetailsLoadNameNull.
@Test
public void testEqualsObjectIsLoadMetadataDetailsLoadNameNull() throws Exception {
LoadMetadataDetails obj = new LoadMetadataDetails();
obj.setLoadName("test");
boolean result = loadMetadataDetails.equals(obj);
assertEquals(false, result);
}
Aggregations