本文整理了Java中org.apache.hadoop.hive.ql.exec.Utilities.removeTempOrDuplicateFiles()
方法的一些代码示例,展示了Utilities.removeTempOrDuplicateFiles()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Utilities.removeTempOrDuplicateFiles()
方法的具体详情如下:
包路径:org.apache.hadoop.hive.ql.exec.Utilities
类名称:Utilities
方法名:removeTempOrDuplicateFiles
[英]Remove all temporary files and duplicate (double-committed) files from a given directory.
[中]从给定目录中删除所有临时文件和重复(双重提交)文件。
代码示例来源:origin: apache/hive
/**
* Remove all temporary files and duplicate (double-committed) files from a given directory.
*/
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path, boolean isBaseDir) throws IOException {
removeTempOrDuplicateFiles(fs, path, null,null,null, isBaseDir);
}
代码示例来源:origin: apache/hive
public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, boolean isBaseDir) throws IOException {
return removeTempOrDuplicateFiles(fs, fileStats, dpCtx, conf, hconf, null, isBaseDir);
}
代码示例来源:origin: apache/drill
/**
* Remove all temporary files and duplicate (double-committed) files from a given directory.
*/
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
removeTempOrDuplicateFiles(fs, path, null,null,null);
}
代码示例来源:origin: apache/hive
public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, Path path,
DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, boolean isBaseDir) throws IOException {
if (path == null) {
return null;
}
List<FileStatus> statusList = HiveStatsUtils.getFileStatusRecurse(path,
((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs);
FileStatus[] stats = statusList.toArray(new FileStatus[statusList.size()]);
return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf, isBaseDir);
}
代码示例来源:origin: apache/drill
public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, Path path,
DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf) throws IOException {
if (path == null) {
return null;
}
FileStatus[] stats = HiveStatsUtils.getFileStatusRecurse(path,
((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs);
return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf);
}
代码示例来源:origin: apache/hive
Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath, false);
代码示例来源:origin: apache/drill
Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);
代码示例来源:origin: apache/drill
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
return result;
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null
&& (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) {
代码示例来源:origin: apache/hive
/**
* Remove all temporary files and duplicate (double-committed) files from a given directory.
*
* @return a list of path names corresponding to should-be-created empty buckets.
*/
public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, Set<Path> filesKept, boolean isBaseDir)
throws IOException {
int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(),
numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0;
return removeTempOrDuplicateFiles(
fs, fileStats, null, dpLevels, numBuckets, hconf, null, 0, false, filesKept, isBaseDir);
}
代码示例来源:origin: apache/hive
finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i));
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, finalResults,
unionSuffix, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, writeId, stmtId,
isMmTable, null, isInsertOverwrite);
代码示例来源:origin: apache/hive
private List<Path> runRemoveTempOrDuplicateFilesTestCase(String executionEngine, boolean dPEnabled)
throws Exception {
Configuration hconf = new HiveConf(this.getClass());
// do this to verify that Utilities.removeTempOrDuplicateFiles does not revert to default scheme information
hconf.set("fs.defaultFS", "hdfs://should-not-be-used/");
hconf.set(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, executionEngine);
FileSystem localFs = FileSystem.getLocal(hconf);
DynamicPartitionCtx dpCtx = getDynamicPartitionCtx(dPEnabled);
Path tempDirPath = setupTempDirWithSingleOutputFile(hconf);
FileSinkDesc conf = getFileSinkDesc(tempDirPath);
List<Path> paths = Utilities.removeTempOrDuplicateFiles(localFs, tempDirPath, dpCtx, conf, hconf, false);
String expectedScheme = tempDirPath.toUri().getScheme();
String expectedAuthority = tempDirPath.toUri().getAuthority();
assertPathsMatchSchemeAndAuthority(expectedScheme, expectedAuthority, paths);
return paths;
}
代码示例来源:origin: apache/hive
perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles");
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(
fs, statuses, dpCtx, conf, hconf, filesKept, false);
perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles");
代码示例来源:origin: apache/drill
perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles");
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, statuses, dpCtx, conf, hconf);
perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles");
代码示例来源:origin: com.facebook.presto.hive/hive-apache
/**
* Remove all temporary files and duplicate (double-committed) files from a given directory.
*/
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
removeTempOrDuplicateFiles(fs, path, null);
}
代码示例来源:origin: org.apache.hadoop.hive/hive-exec
/**
* Remove all temporary files and duplicate (double-committed) files from a given directory.
*
* @return a list of path names corresponding to should-be-created empty buckets.
*/
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
removeTempOrDuplicateFiles(fs, path, null);
}
代码示例来源:origin: org.apache.hadoop.hive/hive-exec
Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);
代码示例来源:origin: org.apache.hadoop.hive/hive-exec
public void mvFileToFinalPath(String specPath, Configuration hconf,
boolean success, Log log, DynamicPartitionCtx dpCtx) throws IOException, HiveException {
FileSystem fs = (new Path(specPath)).getFileSystem(hconf);
Path tmpPath = Utilities.toTempPath(specPath);
Path intermediatePath = new Path(tmpPath.getParent(), tmpPath.getName()
+ ".intermediate");
Path finalPath = new Path(specPath);
if (success) {
if (fs.exists(tmpPath)) {
// Step1: rename tmp output folder to intermediate path. After this
// point, updates from speculative tasks still writing to tmpPath
// will not appear in finalPath.
log.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath);
Utilities.rename(fs, tmpPath, intermediatePath);
// Step2: remove any tmp file or double-committed output files
ArrayList<String> emptyBuckets =
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath, dpCtx);
// create empty buckets if necessary
if (emptyBuckets.size() > 0) {
createEmptyBuckets(hconf, emptyBuckets);
}
// Step3: move to the file destination
log.info("Moving tmp dir: " + intermediatePath + " to: " + finalPath);
Utilities.renameOrMoveFiles(fs, intermediatePath, finalPath);
}
} else {
fs.delete(tmpPath, true);
}
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
removeTempOrDuplicateFiles(items, fs);
代码示例来源:origin: com.facebook.presto.hive/hive-apache
Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public static void mvFileToFinalPath(Path specPath, Configuration hconf,
boolean success, Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf,
Reporter reporter) throws IOException,
HiveException {
FileSystem fs = specPath.getFileSystem(hconf);
Path tmpPath = Utilities.toTempPath(specPath);
Path taskTmpPath = Utilities.toTaskTempPath(specPath);
if (success) {
if (fs.exists(tmpPath)) {
// remove any tmp file or double-committed output files
ArrayList<String> emptyBuckets =
Utilities.removeTempOrDuplicateFiles(fs, tmpPath, dpCtx);
// create empty buckets if necessary
if (emptyBuckets.size() > 0) {
createEmptyBuckets(hconf, emptyBuckets, conf, reporter);
}
// move to the file destination
log.info("Moving tmp dir: " + tmpPath + " to: " + specPath);
Utilities.renameOrMoveFiles(fs, tmpPath, specPath);
}
} else {
fs.delete(tmpPath, true);
}
fs.delete(taskTmpPath, true);
}
内容来源于网络,如有侵权,请联系作者删除!