org.apache.pig.data.Tuple类的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(8.6k)|赞(0)|评价(0)|浏览(160)

本文整理了Java中org.apache.pig.data.Tuple类的一些代码示例,展示了Tuple类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Tuple类的具体详情如下:
包路径:org.apache.pig.data.Tuple
类名称:Tuple

Tuple介绍

[英]An ordered list of Data. A tuple has fields, numbered 0 through (number of fields - 1). The entry in the field can be any datatype, or it can be null.

Tuples are constructed only by a TupleFactory. A DefaultTupleFactoryis provided by the system. If users wish to use their own type of Tuple, they should also provide an implementation of TupleFactory to construct their types of Tuples.
[中]数据的有序列表。元组有编号为0到(字段数-1)的字段。字段中的条目可以是任何数据类型,也可以是null。
元组仅由TupleFactory构造。系统提供了一个默认的TupleFactory。如果用户希望使用自己的元组类型,他们还应该提供一个TupleFactory实现来构造自己的元组类型。

代码示例

代码示例来源:origin: apache/hive

@Override
public void putNext(Tuple tuple) throws IOException {
 List<Object> outgoing = new ArrayList<Object>(tuple.size());
 int i = 0;
 for (HCatFieldSchema fSchema : computedSchema.getFields()) {
  outgoing.add(getJavaObj(tuple.get(i++), fSchema));
 }
 try {
  writer.write(null, new DefaultHCatRecord(outgoing));
 } catch (InterruptedException e) {
  throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e);
 }
}

代码示例来源:origin: apache/hive

private static Tuple transformToTuple(List<?> objList, HCatSchema hs) throws Exception {
 if (objList == null) {
  return null;
 }
 Tuple t = tupFac.newTuple(objList.size());
 List<HCatFieldSchema> subFields = hs.getFields();
 for (int i = 0; i < subFields.size(); i++) {
  t.set(i, extractPigObject(objList.get(i), subFields.get(i)));
 }
 return t;
}

代码示例来源:origin: apache/hive

switch (type) {
case BINARY:
 return ((DataByteArray) pigObj).get();
 List<Object> all = ((Tuple) pigObj).getAll();
 ArrayList<Object> converted = new ArrayList<Object>(all.size());
 for (int i = 0; i < all.size(); i++) {
 HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
 boolean needTuple = tupFS.getType() == Type.STRUCT;
 List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
 Iterator<Tuple> bagItr = pigBag.iterator();
  bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));

代码示例来源:origin: elastic/elasticsearch-hadoop

object = pt.getTuple().get(0);
  type = pt.getTuple().getType(0);
} catch (Exception ex) {
  throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
  to.bytes(dba.get(), dba.size());
  return;

代码示例来源:origin: org.apache.pig/pig

@Override
  public Tuple exec(Tuple input) throws IOException {
    try {
      Tuple t = mTupleFactory.newTuple(2);
      // input is a bag with one tuple containing
      // the column we are trying to avg on
      DataBag bg = (DataBag) input.get(0);
      Long l = null;
      if(bg.iterator().hasNext()) {
        Tuple tp = bg.iterator().next();
        l = (Long)(tp.get(0));
      }
      t.set(0, l);
      if (l != null)
        t.set(1, 1L);
      else
        t.set(1, 0L);
      return t;
    } catch (ExecException ee) {
      throw ee;
    } catch (Exception e) {
      int errCode = 2106;
      String msg = "Error while computing average in " + this.getClass().getSimpleName();
      throw new ExecException(msg, errCode, PigException.BUG, e);
    }
  }
}

代码示例来源:origin: pl.edu.icm.coansys/coansys-io-output

@Override
  public Tuple exec(Tuple tuple) throws IOException {

    if (tuple == null || tuple.size() != 2
        || tuple.getType(0) != DataType.CHARARRAY
        || tuple.getType(1) != DataType.BYTEARRAY) {
      throw new IOException(this.getClass().getName()
          + " expects 2 arguments, first string, second byte array");
    }

    String rowId = (String) tuple.get(0);
    DataByteArray dwByteArray = (DataByteArray) tuple.get(1);

    DocumentWrapper dw = DocumentWrapper.parseFrom(dwByteArray.get());

    DocumentMetadata dm = dw.getDocumentMetadata();
    MediaContainer mc = dw.getMediaContainer();

    Tuple t = TupleFactory.getInstance().newTuple();
    t.append(rowId);
    t.append(new DataByteArray(dm.toByteArray()));
    t.append(new DataByteArray(mc.toByteArray()));

    return t;
  }
}

代码示例来源:origin: pl.edu.icm.coansys/document-similarity-logic

@Override
public Tuple exec(Tuple input) throws IOException {
  DataByteArray dba = (DataByteArray) input.get(0);
  DocumentMetadata metadata = DocumentWrapper.parseFrom(dba.get())
      .getDocumentMetadata();
  Tuple output = TupleFactory.getInstance().newTuple(
      fieldNumberMap.size());
  output = addDocumentMetatdataFields(metadata, output);
  return output;
}

代码示例来源:origin: lucidworks/solr-scale-tk

public DataBag exec(Tuple input) throws IOException {
  DataBag outputBag = bagFactory.newDefaultBag();        
  String idBase = (String)input.get(0);        
  for (int k=0; k < numKeys; k++) {
   String key = idBase+k;
   int key_bucket = random.nextInt(maxRandom);
   Tuple next = tupleFactory.newTuple(2);
   next.set(0, key);
   next.set(1, key_bucket);
   outputBag.add(next);
  }
  return outputBag;
}

代码示例来源:origin: pl.edu.icm.coansys/commons

@Override
  public void putNext(Tuple t) throws IOException {
    if(t.size() != 2) {
      throw new ExecException("Output tuple has wrong size: is " + t.size() + ", should be 2");
    }
    byte[] keyBytes = ((DataByteArray) t.get(0)).get();
    byte[] valueBytes = ((DataByteArray) t.get(1)).get();
    if (keyBytes == null || valueBytes == null) {
      throw new ExecException("Output tuple contains null");
    }

    ArrayList<byte[]> alk = new ArrayList<byte[]>();
    alk.add(keyBytes);
    NullableTuple key = new NullableTuple(TupleFactory.getInstance().newTuple(alk));
    ArrayList<byte[]> alv = new ArrayList<byte[]>();
    alv.add(valueBytes);
    NullableTuple val = new NullableTuple(TupleFactory.getInstance().newTuple(alv));

    try {
      writer.write(key, val);
    } catch (InterruptedException e) {
      throw new IOException(e);
    }
  }
}

代码示例来源:origin: org.apache.pig/pig

static protected long count(Tuple input) throws ExecException {
  DataBag values = (DataBag)input.get(0);
  Iterator it = values.iterator();
  long cnt = 0;
  while (it.hasNext()){
    Tuple t = (Tuple)it.next();
    if (t != null && t.size() > 0 && t.get(0) != null)
      cnt++;
  }
  return cnt;
}

代码示例来源:origin: com.linkedin.datafu/datafu

@Override
public void accumulate(Tuple arg0) throws IOException
{
 DataBag inputBag = (DataBag)arg0.get(0);
 for (Tuple t : inputBag) {
  Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll());
  t1.append(i);
  outputBag.add(t1);
  if (count % 1000000 == 0) {
   outputBag.spill();
   count = 0;
  }
  i++;
  count++;
 }
}

代码示例来源:origin: pl.edu.icm.coansys/coansys-io-output

@Override
  public Tuple exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() != 2 || tuple.getType(1) != DataType.BYTEARRAY) {
      throw new IOException("" + this.getClass().getName() +
          " expects 2 arguments, 2nd must be a bytearray");
    }

    String rowId = (String) tuple.get(0);
    DataByteArray protoDBA = (DataByteArray) tuple.get(1);
    byte[] protoBytes = protoDBA.get();

    DocumentWrapper doc = DocumentProtos.DocumentWrapper.parseFrom(protoBytes);

    Tuple result = tupleFactory.newTuple();
    result.append(rowId);
    result.append(doc.getDocumentMetadata().getKey());
    result.append(protoDBA);

    return result;
  }
}

代码示例来源:origin: org.apache.pig/pig

private Tuple createTuple(Tuple[] data) throws ExecException {
  Tuple out = TupleFactory.getInstance().newTuple();
  for (int i = 0; i < data.length; ++i) {
    Tuple t = data[i];
    int size = t.size();
    for (int j = 0; j < size; ++j) {
      out.append(t.get(j));
    }
  }
  return illustratorMarkup(out, out, 0);
}

代码示例来源:origin: org.apache.pig/pig

@Override
public Tuple call(Tuple input) throws Exception {
  Tuple output = TupleFactory.getInstance()
      .newTuple(input.getAll().size() - 2);
  
  for (int i = 1; i < input.getAll().size() - 2; i ++) {
    output.set(i, input.get(i+2));
  }
  
  long offset = calculateOffset((Integer) input.get(0));
  output.set(0, offset + (Long)input.get(2));
  return output;
}

代码示例来源:origin: apache/phoenix

Tuple t = tupleFactory.newTuple();
t.append(1);
t.append(dt);
t.append(dt);
t.append(dt);

代码示例来源:origin: thedatachef/varaha

public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() < 1 || input.isNull(0))
      return null;

    // Output bag
    DataBag bagOfTokens = bagFactory.newDefaultBag();
        
    StringReader textInput = new StringReader(input.get(0).toString());
    PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");

    for (CoreLabel label; ptbt.hasNext(); ) {
     label = (CoreLabel)ptbt.next();
     Tuple termText = tupleFactory.newTuple(label.toString());
     bagOfTokens.add(termText);
    }
    
    return bagOfTokens;
  }
}

代码示例来源:origin: elastic/elasticsearch-hadoop

dataMap = reader.getCurrentValue();
Tuple tuple = TupleFactory.getInstance().newTuple(dataMap.size());
    tuple.set(i, result);
  Set<Entry<?, ?>> entrySet = dataMap.entrySet();
  for (Map.Entry entry : entrySet) {
    tuple.set(i++, entry.getValue());

代码示例来源:origin: org.apache.pig/pig

public DateTime exec(Tuple input) throws IOException {
  if (input == null || input.size() < 1 || input.get(0) == null) {
    return null;
  }
  String dtStr = DataType.toString(input.get(0));
  return ToDate.extractDateTime(dtStr);
}

代码示例来源:origin: org.apache.pig/pig

@Override
  public Tuple exec(Tuple input) throws IOException {
    // Since Initial is guaranteed to be called
    // only in the map, it will be called with an
    // input of a bag with a single tuple - the 
    // count should always be 1 if bag is non empty
    DataBag bag = (DataBag)input.get(0);
    return mTupleFactory.newTuple(bag.iterator().hasNext()? 
        Long.valueOf(1L) : Long.valueOf(0L));
  }
}

代码示例来源:origin: pl.edu.icm.coansys/commons

@Override
public Tuple exec(Tuple input) throws IOException {
  Tuple t = TupleFactory.getInstance().newTuple();
  t.append(((DataByteArray) input.get(0)).toString());
  return t;
}

相关文章