org.apache.pig.data.DataBag.iterator()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(6.9k)|赞(0)|评价(0)|浏览(112)

本文整理了Java中org.apache.pig.data.DataBag.iterator()方法的一些代码示例,展示了DataBag.iterator()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataBag.iterator()方法的具体详情如下:
包路径:org.apache.pig.data.DataBag
类名称:DataBag
方法名:iterator

DataBag.iterator介绍

[英]Get an iterator to the bag. For default and distinct bags, no particular order is guaranteed. For sorted bags the order is guaranteed to be sorted according to the provided comparator.
[中]

代码示例

代码示例来源:origin: apache/hive

boolean needTuple = tupFS.getType() == Type.STRUCT;
List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
Iterator<Tuple> bagItr = pigBag.iterator();
while (bagItr.hasNext()) {
 bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));

代码示例来源:origin: aseldawy/pigeon

@Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

代码示例来源:origin: org.apache.pig/pig

@Override
  public Tuple exec(Tuple input) throws IOException {
    // Since Initial is guaranteed to be called
    // only in the map, it will be called with an
    // input of a bag with a single tuple - the 
    // count should always be 1 if bag is non empty
    DataBag bag = (DataBag)input.get(0);
    return mTupleFactory.newTuple(bag.iterator().hasNext()? 
        Long.valueOf(1L) : Long.valueOf(0L));
  }
}

代码示例来源:origin: org.apache.pig/pig

private static Set<Tuple> toSet(DataBag bag) {
  Set<Tuple> set = new HashSet<Tuple>();
  Iterator<Tuple> iterator = bag.iterator();
  while (iterator.hasNext()) {
    set.add(iterator.next());
  }
  return set;
}

代码示例来源:origin: aseldawy/pigeon

@Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

代码示例来源:origin: com.linkedin.datafu/datafu

private PriorityQueue<pair> load_bags(Tuple input) throws IOException
{
 PriorityQueue<pair> pq = new PriorityQueue<pair>(input.size());
 for (int i=0; i < input.size(); i++) {
  Object o = input.get(i);
  if (!(o instanceof DataBag))
   throw new RuntimeException("parameters must be databags");
  Iterator<Tuple> inputIterator= ((DataBag) o).iterator();
  if(inputIterator.hasNext())
   pq.add(new pair(inputIterator));
 }
 return pq;
}

代码示例来源:origin: org.apache.pig/pig

protected static void updateTop(PriorityQueue<Tuple> store, int limit, DataBag inputBag) {
  Iterator<Tuple> itr = inputBag.iterator();
  while (itr.hasNext()) {
    Tuple t = itr.next();
    store.add(t);
    if (store.size() > limit)
      store.poll();
  }
}

代码示例来源:origin: aseldawy/pigeon

@Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

代码示例来源:origin: org.apache.pig/pig

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
  if (input == null) {
    if (!init) {
      init = true;
      return true;
    }
    return false;
  }
  if (it == null)
    it = input.iterator();
  if (!it.hasNext())
    return false;
  value = it.next();
  return true;
}

代码示例来源:origin: org.apache.pig/pig

@Override
public Object getListElement(Object list, int i) {
  if (i==0 || list!=cachedObject) {
    cachedObject = list;
    index = -1;
    DataBag db = (DataBag)list;
    iter = db.iterator();
  }
  if (i==index+1) {
    index++;
    try {
      Tuple t = iter.next();
      // If single item tuple, take the item directly from list
      if (t.size() == 1) {
        return t.get(0);
      } else {
        return t;
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  } else {
    throw new RuntimeException("Only sequential read is supported");
  }
}

代码示例来源:origin: org.apache.pig/pig

/**
 * Add contents of a bag to the bag.
 * @param b bag to add contents of.
 */
@Override
public void addAll(DataBag b) {
  Iterator<Tuple> it = b.iterator();
  while(it.hasNext()) {
    add(it.next());
  }
}

代码示例来源:origin: org.apache.pig/pig

static String[][] MakeArray(Operator op, DataBag bag)
    throws Exception {
  int rows = (int) bag.size();
  int cols = ((LogicalRelationalOperator)op).getSchema().getFields().size();
  String[][] table = new String[rows][cols];
  Iterator<Tuple> it = bag.iterator();
  for (int i = 0; i < rows; ++i) {
    Tuple t = it.next();
    for (int j = 0; j < cols; ++j) {
      table[i][j] = ShortenField(t.get(j));
    }
  }
  return table;
}

代码示例来源:origin: mozilla-metrics/akela

static private Tuple realexec(Tuple input) throws IOException
{
  DataBag bag = (DataBag) input.get(0);
  Iterator<Tuple> it = bag.iterator();
  while (it.hasNext()) {
    Tuple t = it.next();
    if (t != null) {
      return t;
    }
  }
  return null;
}

代码示例来源:origin: org.apache.pig/pig

@Override
  public Tuple exec(Tuple input) throws IOException {
    if (!isInitialized())
      initialize();
    try {
      IRubyObject inp = PigJrubyLibrary.pigToRuby(ruby, ((DataBag)input.get(0)).iterator().next().get(0));
      IRubyObject rubyResult = rubyEngine.callMethod(getReceiver(), getStage(), inp, IRubyObject.class);
      return mTupleFactory.newTuple(PigJrubyLibrary.rubyToPig(rubyResult));
    } catch (Exception e) {
      throw new IOException("Error executing initial function",  e);
    }
  }
}

代码示例来源:origin: org.apache.pig/pig

public static String format(DataBag bag) {
    StringBuffer sb = new StringBuffer();
    sb.append('{');

    Iterator<Tuple> it = bag.iterator();
    while (it.hasNext()) {
      Tuple t = it.next();
      String s = TupleFormat.format(t);
      sb.append(s);
      if (it.hasNext())
        sb.append(",");
    }
    sb.append('}');
    return sb.toString();
  }
}

代码示例来源:origin: org.apache.pig/pig

@Override
  public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;
    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();
    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);
    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);
    return TupleFactory.getInstance().newTuple(bloomOut());
  }
}

代码示例来源:origin: org.apache.pig/pig

private void computeDiff(
    DataBag bag1,
    DataBag bag2,
    DataBag emitTo) {
  // Build two hash tables and probe with first one, then the other.
  // This does make the assumption that the distinct set of keys from
  // each bag will fit in memory.
  Set<Tuple> s1 = new HashSet<Tuple>();
  Iterator<Tuple> i1 = bag1.iterator();
  while (i1.hasNext()) s1.add(i1.next());
  Set<Tuple> s2 = new HashSet<Tuple>();
  Iterator<Tuple> i2 = bag2.iterator();
  while (i2.hasNext()) s2.add(i2.next());
  for (Tuple t : s1) if (!s2.contains(t)) emitTo.add(t);
  for (Tuple t : s2) if (!s1.contains(t)) emitTo.add(t);
}

代码示例来源:origin: org.apache.pig/pig

static protected Long sum(Tuple input) throws ExecException, NumberFormatException {
  DataBag values = (DataBag)input.get(0);
  long sum = 0;
  for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
    Tuple t = it.next();
    // Have faith here.  Checking each value before the cast is
    // just too much.
    sum += (Long)t.get(0);
  }
  return sum;
}

代码示例来源:origin: lintool/twitter-tools

public String exec(Tuple input) throws IOException {
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
  if (!it.hasNext()) {
   return null;
  }
  Tuple tup = it.next();

    return (String) tup.get(0);
  }
}

代码示例来源:origin: lintool/twitter-tools

public String exec(Tuple input) throws IOException {
  DataBag bag = (DataBag) input.get(0);
  Iterator<Tuple> it = bag.iterator();
  if (!it.hasNext()) {
   return null;
  }
  it.next();
  if (!it.hasNext()) {
   return null;
  }

  Tuple tup = it.next();

  return (String) tup.get(0);
 }
}

相关文章

微信公众号

最新文章

更多