
x33g5p2x  于2022-01-17 转载在 其他  



[英]Factory for constructing different types of bags. This class is abstract so that users can override the bag factory if they desire to provide their own that returns their implementation of a bag. If the property is set to a class name and is set to a URL pointing to a jar that contains the above named class, then getInstance() will create an instance of the named class using the indicated jar. Otherwise, it will create an instance of DefaultBagFactory.


代码示例来源:origin: org.apache.pig/pig

public NullableBag() {
  if (mFactory == null) {
    mFactory = BagFactory.getInstance();
  mValue = mFactory.newDefaultBag();

代码示例来源:origin: org.apache.pig/pig

private static DataBag createDataBag() {
  if (!initialized) {
    initialized = true;
    if (PigMapReduce.sJobConfInternal.get() != null) {
      String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_DISTINCT_TYPE);
      if (bagType != null && bagType.equalsIgnoreCase("default")) {
        useDefaultBag = true;
  // by default, we create InternalDistinctBag, unless user configures
  // explicitly to use old bag
  return useDefaultBag ? BagFactory.getInstance().newDistinctBag() : new InternalDistinctBag(3);

代码示例来源:origin: org.apache.pig/pig

 * This constructor encapsulated an empty bag.
 * @param ruby an instance of the ruby runtime
 * @param rc   an instance of the class object with meatadata
protected RubyDataBag(final Ruby ruby, RubyClass rc) {
  internalDB = mBagFactory.newDefaultBag();

代码示例来源:origin: com.linkedin.datafu/datafu

 public DataBag exec(Tuple tuple) throws IOException
  DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get());
  for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0))
   candidates.addAll((DataBag) intermediateOutputTuple.get(0));
  DataBag outputBag = bagFactory.newDefaultBag();
  int i = -1;
  for (Tuple candidate : candidates)
   int pos = (Integer) candidate.get(0);
   if (pos > i)
    outputBag.add((Tuple) candidate.get(2));
    i = pos;
  return outputBag;

代码示例来源:origin: org.apache.pig/pig

BagFactory factory = BagFactory.getInstance();
thisClone = factory.newSortedBag(null);
Iterator<Tuple> i = iterator();
while (i.hasNext()) thisClone.add(;
  otherClone = bOther;
} else {
  otherClone = factory.newSortedBag(null);
  i = bOther.iterator();
  while (i.hasNext()) otherClone.add(;

代码示例来源:origin: org.apache.pig/pig

protected static final Result RESULT_EOP = new Result(POStatus.STATUS_EOP, null);
protected static final TupleFactory mTupleFactory = TupleFactory.getInstance();
protected static final BagFactory mBagFactory = BagFactory.getInstance();

代码示例来源:origin: org.apache.pig/pig

sortedBag = mBagFactory.newLimitedSortedBag(mComparator, limit);
} else {
  sortedBag = useDefaultBag ? mBagFactory.newSortedBag(mComparator)
      : new InternalSortedBag(3, mComparator);

代码示例来源:origin: com.linkedin.datafu/datafu

 public DataBag exec(Tuple input) throws IOException
  DataBag outputBag = bagFactory.newDistinctBag();

  try {
   for (int i=0; i < input.size(); i++) {
    Object o = input.get(i);
    if (!(o instanceof DataBag))
     throw new RuntimeException("parameters must be databags");

    DataBag inputBag = (DataBag) o;
    for (Tuple elem : inputBag) {

   return outputBag;
  catch (Exception e) {
   throw new IOException(e);

代码示例来源:origin: Netflix/iceberg

protected DataBag newListData(DataBag reuse) {
 return BF.newDefaultBag();

代码示例来源:origin: com.linkedin.datafu/datafu

public Tuple exec(Tuple tuple) throws IOException
 // sort candidates first by index, then by key
 DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get());
 for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0))
  candidates.addAll((DataBag) intermediateOutputTuple.get(0));
 DataBag outputBag = bagFactory.newDefaultBag();
 int i = -1;
 for (Tuple candidate : candidates)
  int pos = (Integer) candidate.get(0);
  if (pos > i)
   i = pos;
 return tupleFactory.newTuple(outputBag);

代码示例来源:origin: org.apache.pig/pig

BagFactory factory = BagFactory.getInstance();
  thisClone = factory.newSortedBag(null);
  Iterator<Tuple> i = iterator();
  while (i.hasNext()) thisClone.add(;
  otherClone = bOther;
} else {
  otherClone = factory.newSortedBag(null);
  Iterator<Tuple> i = bOther.iterator();
  while (i.hasNext()) otherClone.add(;

代码示例来源:origin: org.apache.pig/pig

distinctBag = useDefaultBag ? mBagFactory.newDistinctBag()
    : new InternalDistinctBag(3);

代码示例来源:origin: com.linkedin.datafu/datafu

public void cleanup()
 this.outputBag = BagFactory.getInstance().newDefaultBag();
 this.i = this.start;
 this.count = 0;

代码示例来源:origin: org.apache.pig/pig

public DataBag getBag(){
    return useDefaultBag ? mBagFactory.newDefaultBag()
        // In a very rare case if there is a POStream after this
        // POJoinPackage in the pipeline and is also blocking the pipeline;
        // constructor argument should be 2 * numInputs. But for one obscure
        // case we don't want to pay the penalty all the time.
        : new InternalCachedBag(numInputs);

代码示例来源:origin: com.linkedin.datafu/datafu

 public DataBag exec(Tuple input) throws IOException
  DataBag bag = (DataBag) input.get(0);
  long n = 0L;
  DataBag selected = bagFactory.newDefaultBag();
  DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());
  for (Tuple innerTuple : bag)
   n += (Long) innerTuple.get(0);
   selected.addAll((DataBag) innerTuple.get(1));
   waiting.addAll((DataBag) innerTuple.get(2));
  long sampleSize = (long) Math.ceil(_samplingProbability * n);
  long nNeeded = sampleSize - selected.size();
  for (Tuple scored : waiting)
   if (nNeeded <= 0)
  return selected;

代码示例来源:origin: org.apache.pig/pig

static DataBag createDataBag() {
  BagFactory bagFactory = BagFactory.getInstance();
  return bagFactory.newDefaultBag();

代码示例来源:origin: org.apache.pig/pig

 * The initialize method is the method used on the Ruby side to construct
 * the RubyDataBag object. The default is just an empty bag.
 * @return the initialized RubyDataBag
public RubyDataBag initialize() {
  internalDB = mBagFactory.newDefaultBag();
  return this;

代码示例来源:origin: com.linkedin.datafu/datafu

DataBag selected = bagFactory.newDefaultBag();
DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());

代码示例来源:origin: thedatachef/varaha

public TermVector() {

代码示例来源:origin: org.apache.pig/pig

public DataBag exec(Tuple input) throws IOException {
  DataBag b = mBagFactory.newDefaultBag();
  for (int i = 0; i < numGroups; i++) {
  return b;



