org.apache.pig.data.BagFactory类的使用及代码示例

x33g5p2x  于2022-01-17 转载在 其他  
字(6.5k)|赞(0)|评价(0)|浏览(150)

本文整理了Java中org.apache.pig.data.BagFactory类的一些代码示例,展示了BagFactory类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。BagFactory类的具体详情如下:
包路径:org.apache.pig.data.BagFactory
类名称:BagFactory

BagFactory介绍

[英]Factory for constructing different types of bags. This class is abstract so that users can override the bag factory if they desire to provide their own that returns their implementation of a bag. If the property pig.data.bag.factory.name is set to a class name and pig.data.bag.factory.jar is set to a URL pointing to a jar that contains the above named class, then getInstance() will create an instance of the named class using the indicated jar. Otherwise, it will create an instance of DefaultBagFactory.
[中]制造不同类型袋子的工厂。此类是抽象的,因此如果用户希望提供自己的返回其包实现的包工厂,则可以重写包工厂。如果财产是猪的话。数据纸袋工厂名称设置为类名和pig。数据纸袋工厂jar被设置为指向包含上述命名类的jar的URL,然后getInstance()将使用指定的jar创建命名类的实例。否则,它将创建DefaultBagFactory的实例。

代码示例

代码示例来源:origin: org.apache.pig/pig

public NullableBag() {
  if (mFactory == null) {
    mFactory = BagFactory.getInstance();
  }
  mValue = mFactory.newDefaultBag();
}

代码示例来源:origin: org.apache.pig/pig

private static DataBag createDataBag() {
  if (!initialized) {
    initialized = true;
    if (PigMapReduce.sJobConfInternal.get() != null) {
      String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_DISTINCT_TYPE);
      if (bagType != null && bagType.equalsIgnoreCase("default")) {
        useDefaultBag = true;
      }
    }
  }
  // by default, we create InternalDistinctBag, unless user configures
  // explicitly to use old bag
  return useDefaultBag ? BagFactory.getInstance().newDistinctBag() : new InternalDistinctBag(3);
}

代码示例来源:origin: org.apache.pig/pig

/**
 * This constructor encapsulated an empty bag.
 *
 * @param ruby an instance of the ruby runtime
 * @param rc   an instance of the class object with meatadata
 */
protected RubyDataBag(final Ruby ruby, RubyClass rc) {
  super(ruby,rc);
  internalDB = mBagFactory.newDefaultBag();
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
 public DataBag exec(Tuple tuple) throws IOException
 {
  DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get());
  for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0))
  {
   candidates.addAll((DataBag) intermediateOutputTuple.get(0));
  }
  DataBag outputBag = bagFactory.newDefaultBag();
  int i = -1;
  for (Tuple candidate : candidates)
  {
   int pos = (Integer) candidate.get(0);
   if (pos > i)
   {
    outputBag.add((Tuple) candidate.get(2));
    i = pos;
   }
  }
  return outputBag;
 }
}

代码示例来源:origin: org.apache.pig/pig

BagFactory factory = BagFactory.getInstance();
thisClone = factory.newSortedBag(null);
Iterator<Tuple> i = iterator();
while (i.hasNext()) thisClone.add(i.next());
  otherClone = bOther;
} else {
  otherClone = factory.newSortedBag(null);
  i = bOther.iterator();
  while (i.hasNext()) otherClone.add(i.next());

代码示例来源:origin: org.apache.pig/pig

protected static final Result RESULT_EOP = new Result(POStatus.STATUS_EOP, null);
protected static final TupleFactory mTupleFactory = TupleFactory.getInstance();
protected static final BagFactory mBagFactory = BagFactory.getInstance();

代码示例来源:origin: org.apache.pig/pig

sortedBag = mBagFactory.newLimitedSortedBag(mComparator, limit);
} else {
  sortedBag = useDefaultBag ? mBagFactory.newSortedBag(mComparator)
      : new InternalSortedBag(3, mComparator);

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
 public DataBag exec(Tuple input) throws IOException
 {
  DataBag outputBag = bagFactory.newDistinctBag();

  try {
   for (int i=0; i < input.size(); i++) {
    Object o = input.get(i);
    if (!(o instanceof DataBag))
     throw new RuntimeException("parameters must be databags");

    DataBag inputBag = (DataBag) o;
    for (Tuple elem : inputBag) {
     outputBag.add(elem);
    }
   }

   return outputBag;
  }
  catch (Exception e) {
   throw new IOException(e);
  }
 }
}

代码示例来源:origin: Netflix/iceberg

@Override
protected DataBag newListData(DataBag reuse) {
 return BF.newDefaultBag();
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
public Tuple exec(Tuple tuple) throws IOException
{
 // sort candidates first by index, then by key
 DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get());
 for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0))
 {
  candidates.addAll((DataBag) intermediateOutputTuple.get(0));
 }
 DataBag outputBag = bagFactory.newDefaultBag();
 int i = -1;
 for (Tuple candidate : candidates)
 {
  int pos = (Integer) candidate.get(0);
  if (pos > i)
  {
   outputBag.add(candidate);
   i = pos;
  }
 }
 return tupleFactory.newTuple(outputBag);
}

代码示例来源:origin: org.apache.pig/pig

BagFactory factory = BagFactory.getInstance();
  thisClone = factory.newSortedBag(null);
  Iterator<Tuple> i = iterator();
  while (i.hasNext()) thisClone.add(i.next());
  otherClone = bOther;
} else {
  otherClone = factory.newSortedBag(null);
  Iterator<Tuple> i = bOther.iterator();
  while (i.hasNext()) otherClone.add(i.next());

代码示例来源:origin: org.apache.pig/pig

distinctBag = useDefaultBag ? mBagFactory.newDistinctBag()
    : new InternalDistinctBag(3);

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
public void cleanup()
{
 this.outputBag = BagFactory.getInstance().newDefaultBag();
 this.i = this.start;
 this.count = 0;
}

代码示例来源:origin: org.apache.pig/pig

public DataBag getBag(){
    return useDefaultBag ? mBagFactory.newDefaultBag()
        // In a very rare case if there is a POStream after this
        // POJoinPackage in the pipeline and is also blocking the pipeline;
        // constructor argument should be 2 * numInputs. But for one obscure
        // case we don't want to pay the penalty all the time.
        : new InternalCachedBag(numInputs);
  }
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
 public DataBag exec(Tuple input) throws IOException
 {
  DataBag bag = (DataBag) input.get(0);
  long n = 0L;
  DataBag selected = bagFactory.newDefaultBag();
  DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());
  for (Tuple innerTuple : bag)
  {
   n += (Long) innerTuple.get(0);
   selected.addAll((DataBag) innerTuple.get(1));
   waiting.addAll((DataBag) innerTuple.get(2));
  }
  long sampleSize = (long) Math.ceil(_samplingProbability * n);
  long nNeeded = sampleSize - selected.size();
  for (Tuple scored : waiting)
  {
   if (nNeeded <= 0)
   {
    break;
   }
   selected.add(ScoredTuple.fromIntermediateTuple(scored).getTuple());
   nNeeded--;
  }
  return selected;
 }
}

代码示例来源:origin: org.apache.pig/pig

static DataBag createDataBag() {
  BagFactory bagFactory = BagFactory.getInstance();
  return bagFactory.newDefaultBag();
}

代码示例来源:origin: org.apache.pig/pig

/**
 * The initialize method is the method used on the Ruby side to construct
 * the RubyDataBag object. The default is just an empty bag.
 *
 * @return the initialized RubyDataBag
 */
@JRubyMethod
@SuppressWarnings("deprecation")
public RubyDataBag initialize() {
  internalDB = mBagFactory.newDefaultBag();
  return this;
}

代码示例来源:origin: com.linkedin.datafu/datafu

DataBag selected = bagFactory.newDefaultBag();
DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator());

代码示例来源:origin: thedatachef/varaha

public TermVector() {
  this(BagFactory.getInstance().newDefaultBag());
}

代码示例来源:origin: org.apache.pig/pig

@Override
public DataBag exec(Tuple input) throws IOException {
  DataBag b = mBagFactory.newDefaultBag();
  for (int i = 0; i < numGroups; i++) {
    b.add(mTupleFactory.newTuple(Integer.valueOf(1)));
  }
  return b;
}

相关文章

微信公众号

最新文章

更多