org.apache.pig.data.DataBag.size()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(8.1k)|赞(0)|评价(0)|浏览(72)

本文整理了Java中org.apache.pig.data.DataBag.size()方法的一些代码示例,展示了DataBag.size()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataBag.size()方法的具体详情如下:
包路径:org.apache.pig.data.DataBag
类名称:DataBag
方法名:size

DataBag.size介绍

[英]Get the number of elements in the bag, both in memory and on disk.
[中]获取包中的元素数,包括内存和磁盘中的元素数。

代码示例

代码示例来源:origin: apache/hive

HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
boolean needTuple = tupFS.getType() == Type.STRUCT;
List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
Iterator<Tuple> bagItr = pigBag.iterator();

代码示例来源:origin: org.apache.pig/pig

@Override
public int getListLength(Object list) {
  return (int)((DataBag)list).size();
}

代码示例来源:origin: com.twitter.elephantbird/elephant-bird-pig

private static Set<Object> toThriftSet(Field elemField, DataBag bag) {
 Set<Object> set = new HashSet<Object>((int)bag.size());
 fillThriftCollection(set, elemField, bag);
 return set;
}

代码示例来源:origin: com.twitter.elephantbird/elephant-bird-pig

private static List<Object> toThriftList(Field elemField, DataBag bag) {
 List<Object> list = new ArrayList<Object>((int)bag.size());
 fillThriftCollection(list, elemField, bag);
 return list;
}

代码示例来源:origin: org.apache.pig/pig

@Override
public Long exec(Tuple input) throws IOException {
  try {
    DataBag bag = (DataBag)(input.get(0));
    return bag == null ? null : Long.valueOf(bag.size());
  } catch (ExecException exp) {
    throw exp;
  } catch (Exception e) {
    int errCode = 2106;
    String msg = "Error while computing size in " + this.getClass().getSimpleName();
    throw new ExecException(msg, errCode, PigException.BUG, e);            
  }
}

代码示例来源:origin: org.apache.pig/pig

@Override
public Long exec(Tuple input) throws IOException {
  try {
    DataBag bag = (DataBag)input.get(0);
    return bag.size();
  } catch (ExecException ee) {
    throw ee;
  } catch (Exception e) {
    int errCode = 2106;                
    String msg = "Error while computing count in " + this.getClass().getSimpleName();
    throw new ExecException(msg, errCode, PigException.BUG, e);
  }
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
public DataBag exec(Tuple input) throws IOException 
{    
 DataBag samples = (DataBag)input.get(0);
 if (samples.size() <= numSamples) {
  return samples;
 }
 else
 {
  return super.exec(input);
 }
}

代码示例来源:origin: org.apache.pig/pig

/**
 * This method returns a string representation of the RubyDataBag. If given an optional
 * argument, then if that argument is true, the contents of the bag will also be printed.
 *
 * @param context the context the method is being executed in
 * @param args    optional true/false argument passed to inspect
 * @return        string representation of the RubyDataBag
 */
@JRubyMethod(name = {"inspect", "to_s", "to_string"}, optional = 1)
public RubyString inspect(ThreadContext context, IRubyObject[] args) {
  Ruby runtime = context.getRuntime();
  StringBuilder sb = new StringBuilder();
  sb.append("[DataBag: size: ").append(internalDB.size());
  if (args.length > 0 && args[0].isTrue())
    sb.append(" = ").append(internalDB.toString());
  sb.append("]");
  return RubyString.newString(runtime, sb);
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
public void accumulate(Tuple b) throws IOException
{
 DataBag bag = (DataBag) b.get(0);
 if (bag == null || bag.size() == 0)
  return;
 for (Tuple t : bag) {
  Object o = t.get(0);
  if (!(o instanceof Number)) {
   throw new IllegalStateException("bag must have numerical values (and be non-null)");
  }
  estimator.add(((Number) o).doubleValue());
 }
}

代码示例来源:origin: org.apache.pig/pig

/**
 * This method returns whether or not the encapsulated DataBag is empty.
 *
 * @param context the context the method is being executed in
 i @return        true if the encapsulated DAtaBag is empty, false otherwise
 */
@JRubyMethod(name = "empty?")
public RubyBoolean isEmpty(ThreadContext context) {
  return RubyBoolean.newBoolean(context.getRuntime(), internalDB.size() == 0);
}

代码示例来源:origin: org.apache.pig/pig

/**
 * This returns the size of the encapsulated DataBag.
 *
 * @param context the context the method is being executed in
 * @return        the size of the encapsulated DataBag
 */
@JRubyMethod(name={"size","length"})
public RubyFixnum size(ThreadContext context) {
  return RubyFixnum.newFixnum(context.getRuntime(), internalDB.size());
}

代码示例来源:origin: org.apache.pig/pig

@Override
public Boolean exec(Tuple input) throws IOException {
  try {
    Object values = input.get(0);
    if (values instanceof DataBag)
      return ((DataBag)values).size() == 0;
    else if (values instanceof Map)
      return ((Map)values).size() == 0;
    else {
      int errCode = 2102;
      String msg = "Cannot test a " +
      DataType.findTypeName(values) + " for emptiness.";
      throw new ExecException(msg, errCode, PigException.BUG);
    }
  } catch (ExecException ee) {
    throw ee;
  }
}

代码示例来源:origin: aseldawy/pigeon

static protected OGCGeometry union(Tuple input) throws ExecException {
 DataBag values = (DataBag)input.get(0);
 if (values.size() == 0)
  return null;
 ArrayList<OGCGeometry> all_geoms = new ArrayList<OGCGeometry>();
 for (Tuple one_geom : values) {
  OGCGeometry parsedGeom = geometryParser.parseGeom(one_geom.get(0));
  all_geoms.add(parsedGeom);
 }
 
 // Do a union of all_geometries in the recommended way (using buffer(0))
 OGCGeometryCollection geom_collection = new OGCConcreteGeometryCollection(
   all_geoms, all_geoms.get(0).getEsriSpatialReference());
 return geom_collection.union(all_geoms.get(0));
}

代码示例来源:origin: org.apache.pig/pig

static String[][] MakeArray(Operator op, DataBag bag)
    throws Exception {
  int rows = (int) bag.size();
  int cols = ((LogicalRelationalOperator)op).getSchema().getFields().size();
  String[][] table = new String[rows][cols];
  Iterator<Tuple> it = bag.iterator();
  for (int i = 0; i < rows; ++i) {
    Tuple t = it.next();
    for (int j = 0; j < cols; ++j) {
      table[i][j] = ShortenField(t.get(j));
    }
  }
  return table;
}

代码示例来源:origin: aseldawy/pigeon

static protected OGCGeometry extent(Tuple input) throws ExecException {
 DataBag values = (DataBag)input.get(0);
 if (values.size() == 0)
  return null;
 ArrayList<OGCGeometry> all_geoms = new ArrayList<OGCGeometry>();
 for (Tuple one_geom : values) {
  OGCGeometry parsedGeom = geometryParser.parseGeom(one_geom.get(0));
  all_geoms.add(parsedGeom);
 }
 
 // Do a union of all_geometries in the recommended way (using buffer(0))
 OGCGeometryCollection geom_collection = new OGCConcreteGeometryCollection(
   all_geoms, all_geoms.get(0).getEsriSpatialReference());
 return geom_collection.envelope();
}

代码示例来源:origin: aseldawy/pigeon

static protected OGCGeometry convexHull(Tuple input) throws ExecException {
 DataBag values = (DataBag)input.get(0);
 if (values.size() == 0)
  return null;
 ArrayList<OGCGeometry> all_geoms =
   new ArrayList<OGCGeometry>();
 for (Tuple one_geom : values) {
  OGCGeometry parsedGeom = geometryParser.parseGeom(one_geom.get(0));
  all_geoms.add(parsedGeom);
 }
 
 // Do a convex null of all_geometries
 OGCGeometryCollection geom_collection = new OGCConcreteGeometryCollection(
   all_geoms, all_geoms.get(0).getEsriSpatialReference());
 return geom_collection.convexHull();
}

代码示例来源:origin: com.twitter/parquet-pig

/**
 * add a bag to the summary data
 *
 * @param bag
 */
public void add(Schema schema, DataBag bag) {
 super.add(bag);
 size.add(bag.size());
 FieldSchema field = getField(schema, 0);
 if (bag.size() > 0 && content == null) {
  content = new FieldSummaryData();
  content.setName(getName(field));
 }
 for (Tuple tuple : bag) {
  content.add(getSchema(field), tuple);
 }
}

代码示例来源:origin: org.apache.pig/pig

public Result getNextDataBag() throws ExecException {
  Result val = new Result();
  DataBag tmpBag = mBagFactory.newDefaultBag();
  for (Result ret = getNextTuple(); ret.returnStatus != POStatus.STATUS_EOP; ret = getNextTuple()) {
    if (ret.returnStatus == POStatus.STATUS_ERR) {
      return ret;
    } else if (ret.returnStatus == POStatus.STATUS_NULL) {
      continue;
    } else {
      tmpBag.add((Tuple) ret.result);
    }
  }
  val.result = tmpBag;
  val.returnStatus = (tmpBag.size() == 0)? POStatus.STATUS_EOP : POStatus.STATUS_OK;
  return val;
}

代码示例来源:origin: aseldawy/pigeon

@Override
public DataByteArray exec(Tuple b) throws IOException {
 DataBag points = (DataBag) b.get(0);
 Coordinate[] coordinates = new Coordinate[(int) points.size()];
 int i = 0;
 for (Tuple t : points) {
  Geometry point = geometryParser.parseGeom(t.get(0));
  coordinates[i++] = point.getCoordinate();
 }
 Geometry line = geometryFactory.createLineString(coordinates);
 return new DataByteArray(wkbWriter.write(line));
}

代码示例来源:origin: com.linkedin.datafu/datafu

public DataBag call(DataBag inputBag) throws IOException
{
 DataBag outputBag = BagFactory.getInstance().newDefaultBag();
 long i = start, count = 0;
 i = inputBag.size() - 1 + start;
 for (Tuple t : inputBag) {
  Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll());
  t1.append(i);
  outputBag.add(t1);
  if (count % 1000000 == 0) {
   outputBag.spill();
   count = 0;
  }
  i--;
  count++;
 }
 return outputBag;
}

相关文章

微信公众号

最新文章

更多