[英]InputSplit represents the data to be processed by an individual Mapper.

Typically, it presents a byte-oriented view on the input and is the responsibility of RecordReader of the job to process this and present a record-oriented view.


代码示例来源:origin: apache/hive

protected static FileSplit getFileSplit(Job vectorJob) throws IOException, InterruptedException {
 ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
 InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
 FileSplit fsplit = new FileSplit(file, 0L, split.getLength(), split.getLocations());
 return fsplit;

代码示例来源:origin: apache/ignite

@Override public boolean nextKeyValue() throws IOException, InterruptedException {
  return cnt++ < split.getLength();

代码示例来源:origin: apache/flink

public String[] getHostnames() {
  try {
    return mapreduceInputSplit.getLocations();
  catch (Exception e) {
    return new String[0];

代码示例来源:origin: org.apache.hadoop/hadoop-mapreduce-client-core

public void testSplitLocationInfo() throws Exception {
 Configuration conf = getConfiguration();
 Job job = Job.getInstance(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 List<InputSplit> splits = fileInputFormat.getSplits(job);
 String[] locations = splits.get(0).getLocations();
 Assert.assertEquals(2, locations.length);
 SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
 Assert.assertEquals(2, locationInfo.length);
 SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
   locationInfo[0] : locationInfo[1];
 SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
   locationInfo[0] : locationInfo[1];

代码示例来源:origin: apache/ignite

@Override public float getProgress() throws IOException, InterruptedException {
  return (float)cnt / split.getLength();

代码示例来源:origin: apache/ignite

res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

代码示例来源:origin: org.apache.hadoop/hadoop-mapred

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);

代码示例来源:origin: apache/ignite

@Override public boolean nextKeyValue() throws IOException, InterruptedException {
  return ++cnt <= split.getLength();

代码示例来源:origin: apache/hbase

TableSnapshotRegionSplit snapshotRegionSplit = (TableSnapshotRegionSplit) split;
if (localityEnabled) {
 Assert.assertTrue(split.getLocations() != null && split.getLocations().length != 0);
} else {
 Assert.assertTrue(split.getLocations() != null && split.getLocations().length == 0);

代码示例来源:origin: io.hops/hadoop-mapreduce-client-core

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);

代码示例来源:origin: apache/kylin

public static double getTotalMapInputMB(Job job)
    throws ClassNotFoundException, IOException, InterruptedException, JobException {
  if (job == null) {
    throw new JobException("Job is null");
  long mapInputBytes = 0;
  InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
  for (InputSplit split : input.getSplits(job)) {
    mapInputBytes += split.getLength();
  // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470) 
  if (mapInputBytes == 0) {
    logger.warn("Map input splits are 0 bytes, something is wrong?");
  double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
  return totalMapInputMB;

代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core

 * getLocations from ith InputSplit.
public String[] getLocation(int i) throws IOException, InterruptedException {
 return splits[i].getLocations();

代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);

代码示例来源:origin: KylinOLAP/Kylin

protected double getTotalMapInputMB() throws ClassNotFoundException, IOException, InterruptedException, JobException {
  if (job == null) {
    throw new JobException("Job is null");
  long mapInputBytes = 0;
  InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
  for (InputSplit split : input.getSplits(job)) {
    mapInputBytes += split.getLength();
  if (mapInputBytes == 0) {
    throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!");
  double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
  return totalMapInputMB;


代码示例来源:origin: io.prestosql.hadoop/hadoop-apache

public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);

代码示例来源:origin: io.hops/hadoop-mapreduce-client-core

 * Get the length of ith child InputSplit.
public long getLength(int i) throws IOException, InterruptedException {
 return splits[i].getLength();


 * getLocations from ith InputSplit.
public String[] getLocation(int i) throws IOException, InterruptedException {
 return splits[i].getLocations();


public SplitMetaInfo(InputSplit split, long startOffset) throws IOException {
 try {
  this.locations = split.getLocations();
  this.inputDataLength = split.getLength();
  this.startOffset = startOffset;
 } catch (InterruptedException ie) {
  throw new IOException(ie);

代码示例来源:origin: org.apache.pig/pig

 * Return the length of a wrapped split
 * @param idx the index into the wrapped splits
 * @return number of wrapped splits
public long getLength(int idx) throws IOException, InterruptedException {
  return wrappedSplits[idx].getLength();



