org.apache.hadoop.mapreduce.InputSplit类的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(6.5k)|赞(0)|评价(0)|浏览(196)

本文整理了Java中org.apache.hadoop.mapreduce.InputSplit类的一些代码示例,展示了InputSplit类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。InputSplit类的具体详情如下:
包路径:org.apache.hadoop.mapreduce.InputSplit
类名称:InputSplit

InputSplit介绍

[英]InputSplit represents the data to be processed by an individual Mapper.

Typically, it presents a byte-oriented view on the input and is the responsibility of RecordReader of the job to process this and present a record-oriented view.
[中]InputSplit表示要由单个映射程序处理的数据。
通常,它在输入上显示一个面向字节的视图,作业的RecordReader负责处理该视图并显示一个面向记录的视图。

代码示例

代码示例来源:origin: apache/hive

protected static FileSplit getFileSplit(Job vectorJob) throws IOException, InterruptedException {
 ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
 InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
 FileSplit fsplit = new FileSplit(file, 0L, split.getLength(), split.getLocations());
 return fsplit;
}

代码示例来源:origin: apache/ignite

@Override public boolean nextKeyValue() throws IOException, InterruptedException {
  return cnt++ < split.getLength();
}

代码示例来源:origin: apache/flink

@Override
public String[] getHostnames() {
  try {
    return mapreduceInputSplit.getLocations();
  }
  catch (Exception e) {
    return new String[0];
  }
}

代码示例来源:origin: org.apache.hadoop/hadoop-mapreduce-client-core

@Test
public void testSplitLocationInfo() throws Exception {
 Configuration conf = getConfiguration();
 conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
   "test:///a1/a2");
 Job job = Job.getInstance(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 List<InputSplit> splits = fileInputFormat.getSplits(job);
 String[] locations = splits.get(0).getLocations();
 Assert.assertEquals(2, locations.length);
 SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
 Assert.assertEquals(2, locationInfo.length);
 SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
   locationInfo[0] : locationInfo[1];
 SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
   locationInfo[0] : locationInfo[1];
 Assert.assertTrue(localhostInfo.isOnDisk());
 Assert.assertTrue(localhostInfo.isInMemory());
 Assert.assertTrue(otherhostInfo.isOnDisk());
 Assert.assertFalse(otherhostInfo.isInMemory());
}

代码示例来源:origin: apache/ignite

@Override public float getProgress() throws IOException, InterruptedException {
  return (float)cnt / split.getLength();
}

代码示例来源:origin: apache/ignite

res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

代码示例来源:origin: org.apache.hadoop/hadoop-mapred

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

代码示例来源:origin: apache/ignite

@Override public boolean nextKeyValue() throws IOException, InterruptedException {
  return ++cnt <= split.getLength();
}

代码示例来源:origin: apache/hbase

TableSnapshotRegionSplit snapshotRegionSplit = (TableSnapshotRegionSplit) split;
if (localityEnabled) {
 Assert.assertTrue(split.getLocations() != null && split.getLocations().length != 0);
} else {
 Assert.assertTrue(split.getLocations() != null && split.getLocations().length == 0);

代码示例来源:origin: io.hops/hadoop-mapreduce-client-core

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

代码示例来源:origin: apache/kylin

public static double getTotalMapInputMB(Job job)
    throws ClassNotFoundException, IOException, InterruptedException, JobException {
  if (job == null) {
    throw new JobException("Job is null");
  }
  long mapInputBytes = 0;
  InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
  for (InputSplit split : input.getSplits(job)) {
    mapInputBytes += split.getLength();
  }
  
  // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470) 
  if (mapInputBytes == 0) {
    logger.warn("Map input splits are 0 bytes, something is wrong?");
  }
  
  double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
  return totalMapInputMB;
}

代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core

/**
 * getLocations from ith InputSplit.
 */
public String[] getLocation(int i) throws IOException, InterruptedException {
 return splits[i].getLocations();
}

代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

代码示例来源:origin: KylinOLAP/Kylin

protected double getTotalMapInputMB() throws ClassNotFoundException, IOException, InterruptedException, JobException {
  if (job == null) {
    throw new JobException("Job is null");
  }
  long mapInputBytes = 0;
  InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
  for (InputSplit split : input.getSplits(job)) {
    mapInputBytes += split.getLength();
  }
  if (mapInputBytes == 0) {
    throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!");
  }
  double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
  return totalMapInputMB;
}

代码示例来源:origin: com.alibaba.blink/flink-hadoop-compatibility

@Override
public String[] getHostnames() {
  try {
    return mapreduceInputSplit.getLocations();
  }
  catch (Exception e) {
    return new String[0];
  }
}

代码示例来源:origin: io.prestosql.hadoop/hadoop-apache

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

代码示例来源:origin: io.hops/hadoop-mapreduce-client-core

/**
 * Get the length of ith child InputSplit.
 */
public long getLength(int i) throws IOException, InterruptedException {
 return splits[i].getLength();
}

代码示例来源:origin: ch.cern.hadoop/hadoop-mapreduce-client-core

/**
 * getLocations from ith InputSplit.
 */
public String[] getLocation(int i) throws IOException, InterruptedException {
 return splits[i].getLocations();
}

代码示例来源:origin: ch.cern.hadoop/hadoop-mapreduce-client-core

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

代码示例来源:origin: org.apache.pig/pig

/**
 * Return the length of a wrapped split
 * @param idx the index into the wrapped splits
 * @return number of wrapped splits
 */
public long getLength(int idx) throws IOException, InterruptedException {
  return wrappedSplits[idx].getLength();
}

相关文章

微信公众号

最新文章

更多