org.apache.hadoop.hive.ql.exec.Utilities.removeTempOrDuplicateFiles()方法的使用及代码示例

x33g5p2x  于2022-02-01 转载在 其他  
字(8.4k)|赞(0)|评价(0)|浏览(75)

本文整理了Java中org.apache.hadoop.hive.ql.exec.Utilities.removeTempOrDuplicateFiles()方法的一些代码示例,展示了Utilities.removeTempOrDuplicateFiles()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Utilities.removeTempOrDuplicateFiles()方法的具体详情如下:
包路径:org.apache.hadoop.hive.ql.exec.Utilities
类名称:Utilities
方法名:removeTempOrDuplicateFiles

Utilities.removeTempOrDuplicateFiles介绍

[英]Remove all temporary files and duplicate (double-committed) files from a given directory.
[中]从给定目录中删除所有临时文件和重复(双重提交)文件。

代码示例

代码示例来源:origin: apache/hive

/**
 * Remove all temporary files and duplicate (double-committed) files from a given directory.
 */
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path, boolean isBaseDir) throws IOException {
 removeTempOrDuplicateFiles(fs, path, null,null,null, isBaseDir);
}

代码示例来源:origin: apache/hive

public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
  DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, boolean isBaseDir) throws IOException {
 return removeTempOrDuplicateFiles(fs, fileStats, dpCtx, conf, hconf, null, isBaseDir);
}

代码示例来源:origin: apache/drill

/**
 * Remove all temporary files and duplicate (double-committed) files from a given directory.
 */
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
 removeTempOrDuplicateFiles(fs, path, null,null,null);
}

代码示例来源:origin: apache/hive

public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, Path path,
  DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, boolean isBaseDir) throws IOException {
 if (path  == null) {
  return null;
 }
 List<FileStatus> statusList = HiveStatsUtils.getFileStatusRecurse(path,
   ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs);
 FileStatus[] stats = statusList.toArray(new FileStatus[statusList.size()]);
 return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf, isBaseDir);
}

代码示例来源:origin: apache/drill

public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, Path path,
  DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf) throws IOException {
 if (path  == null) {
  return null;
 }
 FileStatus[] stats = HiveStatsUtils.getFileStatusRecurse(path,
   ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs);
 return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf);
}

代码示例来源:origin: apache/hive

Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath, false);

代码示例来源:origin: apache/drill

Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);

代码示例来源:origin: apache/drill

taskIDToFile = removeTempOrDuplicateFiles(items, fs);
 return result;
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null
  && (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) {

代码示例来源:origin: apache/hive

/**
 * Remove all temporary files and duplicate (double-committed) files from a given directory.
 *
 * @return a list of path names corresponding to should-be-created empty buckets.
 */
public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
  DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, Set<Path> filesKept, boolean isBaseDir)
    throws IOException {
 int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(),
   numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0;
 return removeTempOrDuplicateFiles(
   fs, fileStats, null, dpLevels, numBuckets, hconf, null, 0, false, filesKept, isBaseDir);
}

代码示例来源:origin: apache/hive

finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i));
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, finalResults,
  unionSuffix, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, writeId, stmtId,
    isMmTable, null, isInsertOverwrite);

代码示例来源:origin: apache/hive

private List<Path> runRemoveTempOrDuplicateFilesTestCase(String executionEngine, boolean dPEnabled)
  throws Exception {
 Configuration hconf = new HiveConf(this.getClass());
 // do this to verify that Utilities.removeTempOrDuplicateFiles does not revert to default scheme information
 hconf.set("fs.defaultFS", "hdfs://should-not-be-used/");
 hconf.set(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, executionEngine);
 FileSystem localFs = FileSystem.getLocal(hconf);
 DynamicPartitionCtx dpCtx = getDynamicPartitionCtx(dPEnabled);
 Path tempDirPath = setupTempDirWithSingleOutputFile(hconf);
 FileSinkDesc conf = getFileSinkDesc(tempDirPath);
 List<Path> paths = Utilities.removeTempOrDuplicateFiles(localFs, tempDirPath, dpCtx, conf, hconf, false);
 String expectedScheme = tempDirPath.toUri().getScheme();
 String expectedAuthority = tempDirPath.toUri().getAuthority();
 assertPathsMatchSchemeAndAuthority(expectedScheme, expectedAuthority, paths);
 return paths;
}

代码示例来源:origin: apache/hive

perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles");
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(
  fs, statuses, dpCtx, conf, hconf, filesKept, false);
perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles");

代码示例来源:origin: apache/drill

perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles");
List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, statuses, dpCtx, conf, hconf);
perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles");

代码示例来源:origin: com.facebook.presto.hive/hive-apache

/**
 * Remove all temporary files and duplicate (double-committed) files from a given directory.
 */
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
 removeTempOrDuplicateFiles(fs, path, null);
}

代码示例来源:origin: org.apache.hadoop.hive/hive-exec

/**
 * Remove all temporary files and duplicate (double-committed) files from a given directory.
 *
 * @return a list of path names corresponding to should-be-created empty buckets.
 */
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
 removeTempOrDuplicateFiles(fs, path, null);
}

代码示例来源:origin: org.apache.hadoop.hive/hive-exec

Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);

代码示例来源:origin: org.apache.hadoop.hive/hive-exec

public void mvFileToFinalPath(String specPath, Configuration hconf,
  boolean success, Log log, DynamicPartitionCtx dpCtx) throws IOException, HiveException {
 FileSystem fs = (new Path(specPath)).getFileSystem(hconf);
 Path tmpPath = Utilities.toTempPath(specPath);
 Path intermediatePath = new Path(tmpPath.getParent(), tmpPath.getName()
   + ".intermediate");
 Path finalPath = new Path(specPath);
 if (success) {
  if (fs.exists(tmpPath)) {
   // Step1: rename tmp output folder to intermediate path. After this
   // point, updates from speculative tasks still writing to tmpPath
   // will not appear in finalPath.
   log.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath);
   Utilities.rename(fs, tmpPath, intermediatePath);
   // Step2: remove any tmp file or double-committed output files
   ArrayList<String> emptyBuckets =
    Utilities.removeTempOrDuplicateFiles(fs, intermediatePath, dpCtx);
   // create empty buckets if necessary
   if (emptyBuckets.size() > 0) {
    createEmptyBuckets(hconf, emptyBuckets);
   }
   // Step3: move to the file destination
   log.info("Moving tmp dir: " + intermediatePath + " to: " + finalPath);
   Utilities.renameOrMoveFiles(fs, intermediatePath, finalPath);
  }
 } else {
  fs.delete(tmpPath, true);
 }
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

taskIDToFile = removeTempOrDuplicateFiles(items, fs);
removeTempOrDuplicateFiles(items, fs);

代码示例来源:origin: com.facebook.presto.hive/hive-apache

Utilities.rename(fs, tmpPath, intermediatePath);
Utilities.removeTempOrDuplicateFiles(fs, intermediatePath);

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public static void mvFileToFinalPath(Path specPath, Configuration hconf,
  boolean success, Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf,
  Reporter reporter) throws IOException,
  HiveException {
 FileSystem fs = specPath.getFileSystem(hconf);
 Path tmpPath = Utilities.toTempPath(specPath);
 Path taskTmpPath = Utilities.toTaskTempPath(specPath);
 if (success) {
  if (fs.exists(tmpPath)) {
   // remove any tmp file or double-committed output files
   ArrayList<String> emptyBuckets =
     Utilities.removeTempOrDuplicateFiles(fs, tmpPath, dpCtx);
   // create empty buckets if necessary
   if (emptyBuckets.size() > 0) {
    createEmptyBuckets(hconf, emptyBuckets, conf, reporter);
   }
   // move to the file destination
   log.info("Moving tmp dir: " + tmpPath + " to: " + specPath);
   Utilities.renameOrMoveFiles(fs, tmpPath, specPath);
  }
 } else {
  fs.delete(tmpPath, true);
 }
 fs.delete(taskTmpPath, true);
}

相关文章

微信公众号

最新文章

更多

Utilities类方法