本文整理了Java中org.apache.pig.data.Tuple
类的一些代码示例,展示了Tuple
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Tuple
类的具体详情如下:
包路径:org.apache.pig.data.Tuple
类名称:Tuple
[英]An ordered list of Data. A tuple has fields, numbered 0 through (number of fields - 1). The entry in the field can be any datatype, or it can be null.
Tuples are constructed only by a TupleFactory. A DefaultTupleFactoryis provided by the system. If users wish to use their own type of Tuple, they should also provide an implementation of TupleFactory to construct their types of Tuples.
[中]数据的有序列表。元组有编号为0到(字段数-1)的字段。字段中的条目可以是任何数据类型,也可以是null。
元组仅由TupleFactory构造。系统提供了一个默认的TupleFactory。如果用户希望使用自己的元组类型,他们还应该提供一个TupleFactory实现来构造自己的元组类型。
代码示例来源:origin: apache/hive
@Override
public void putNext(Tuple tuple) throws IOException {
List<Object> outgoing = new ArrayList<Object>(tuple.size());
int i = 0;
for (HCatFieldSchema fSchema : computedSchema.getFields()) {
outgoing.add(getJavaObj(tuple.get(i++), fSchema));
}
try {
writer.write(null, new DefaultHCatRecord(outgoing));
} catch (InterruptedException e) {
throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e);
}
}
代码示例来源:origin: apache/hive
private static Tuple transformToTuple(List<?> objList, HCatSchema hs) throws Exception {
if (objList == null) {
return null;
}
Tuple t = tupFac.newTuple(objList.size());
List<HCatFieldSchema> subFields = hs.getFields();
for (int i = 0; i < subFields.size(); i++) {
t.set(i, extractPigObject(objList.get(i), subFields.get(i)));
}
return t;
}
代码示例来源:origin: apache/hive
switch (type) {
case BINARY:
return ((DataByteArray) pigObj).get();
List<Object> all = ((Tuple) pigObj).getAll();
ArrayList<Object> converted = new ArrayList<Object>(all.size());
for (int i = 0; i < all.size(); i++) {
HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
boolean needTuple = tupFS.getType() == Type.STRUCT;
List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
Iterator<Tuple> bagItr = pigBag.iterator();
bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
代码示例来源:origin: elastic/elasticsearch-hadoop
object = pt.getTuple().get(0);
type = pt.getTuple().getType(0);
} catch (Exception ex) {
throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
to.bytes(dba.get(), dba.size());
return;
代码示例来源:origin: org.apache.pig/pig
@Override
public Tuple exec(Tuple input) throws IOException {
try {
Tuple t = mTupleFactory.newTuple(2);
// input is a bag with one tuple containing
// the column we are trying to avg on
DataBag bg = (DataBag) input.get(0);
Long l = null;
if(bg.iterator().hasNext()) {
Tuple tp = bg.iterator().next();
l = (Long)(tp.get(0));
}
t.set(0, l);
if (l != null)
t.set(1, 1L);
else
t.set(1, 0L);
return t;
} catch (ExecException ee) {
throw ee;
} catch (Exception e) {
int errCode = 2106;
String msg = "Error while computing average in " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}
}
代码示例来源:origin: pl.edu.icm.coansys/coansys-io-output
@Override
public Tuple exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() != 2
|| tuple.getType(0) != DataType.CHARARRAY
|| tuple.getType(1) != DataType.BYTEARRAY) {
throw new IOException(this.getClass().getName()
+ " expects 2 arguments, first string, second byte array");
}
String rowId = (String) tuple.get(0);
DataByteArray dwByteArray = (DataByteArray) tuple.get(1);
DocumentWrapper dw = DocumentWrapper.parseFrom(dwByteArray.get());
DocumentMetadata dm = dw.getDocumentMetadata();
MediaContainer mc = dw.getMediaContainer();
Tuple t = TupleFactory.getInstance().newTuple();
t.append(rowId);
t.append(new DataByteArray(dm.toByteArray()));
t.append(new DataByteArray(mc.toByteArray()));
return t;
}
}
代码示例来源:origin: pl.edu.icm.coansys/document-similarity-logic
@Override
public Tuple exec(Tuple input) throws IOException {
DataByteArray dba = (DataByteArray) input.get(0);
DocumentMetadata metadata = DocumentWrapper.parseFrom(dba.get())
.getDocumentMetadata();
Tuple output = TupleFactory.getInstance().newTuple(
fieldNumberMap.size());
output = addDocumentMetatdataFields(metadata, output);
return output;
}
代码示例来源:origin: lucidworks/solr-scale-tk
public DataBag exec(Tuple input) throws IOException {
DataBag outputBag = bagFactory.newDefaultBag();
String idBase = (String)input.get(0);
for (int k=0; k < numKeys; k++) {
String key = idBase+k;
int key_bucket = random.nextInt(maxRandom);
Tuple next = tupleFactory.newTuple(2);
next.set(0, key);
next.set(1, key_bucket);
outputBag.add(next);
}
return outputBag;
}
代码示例来源:origin: pl.edu.icm.coansys/commons
@Override
public void putNext(Tuple t) throws IOException {
if(t.size() != 2) {
throw new ExecException("Output tuple has wrong size: is " + t.size() + ", should be 2");
}
byte[] keyBytes = ((DataByteArray) t.get(0)).get();
byte[] valueBytes = ((DataByteArray) t.get(1)).get();
if (keyBytes == null || valueBytes == null) {
throw new ExecException("Output tuple contains null");
}
ArrayList<byte[]> alk = new ArrayList<byte[]>();
alk.add(keyBytes);
NullableTuple key = new NullableTuple(TupleFactory.getInstance().newTuple(alk));
ArrayList<byte[]> alv = new ArrayList<byte[]>();
alv.add(valueBytes);
NullableTuple val = new NullableTuple(TupleFactory.getInstance().newTuple(alv));
try {
writer.write(key, val);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}
代码示例来源:origin: org.apache.pig/pig
static protected long count(Tuple input) throws ExecException {
DataBag values = (DataBag)input.get(0);
Iterator it = values.iterator();
long cnt = 0;
while (it.hasNext()){
Tuple t = (Tuple)it.next();
if (t != null && t.size() > 0 && t.get(0) != null)
cnt++;
}
return cnt;
}
代码示例来源:origin: com.linkedin.datafu/datafu
@Override
public void accumulate(Tuple arg0) throws IOException
{
DataBag inputBag = (DataBag)arg0.get(0);
for (Tuple t : inputBag) {
Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll());
t1.append(i);
outputBag.add(t1);
if (count % 1000000 == 0) {
outputBag.spill();
count = 0;
}
i++;
count++;
}
}
代码示例来源:origin: pl.edu.icm.coansys/coansys-io-output
@Override
public Tuple exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() != 2 || tuple.getType(1) != DataType.BYTEARRAY) {
throw new IOException("" + this.getClass().getName() +
" expects 2 arguments, 2nd must be a bytearray");
}
String rowId = (String) tuple.get(0);
DataByteArray protoDBA = (DataByteArray) tuple.get(1);
byte[] protoBytes = protoDBA.get();
DocumentWrapper doc = DocumentProtos.DocumentWrapper.parseFrom(protoBytes);
Tuple result = tupleFactory.newTuple();
result.append(rowId);
result.append(doc.getDocumentMetadata().getKey());
result.append(protoDBA);
return result;
}
}
代码示例来源:origin: org.apache.pig/pig
private Tuple createTuple(Tuple[] data) throws ExecException {
Tuple out = TupleFactory.getInstance().newTuple();
for (int i = 0; i < data.length; ++i) {
Tuple t = data[i];
int size = t.size();
for (int j = 0; j < size; ++j) {
out.append(t.get(j));
}
}
return illustratorMarkup(out, out, 0);
}
代码示例来源:origin: org.apache.pig/pig
@Override
public Tuple call(Tuple input) throws Exception {
Tuple output = TupleFactory.getInstance()
.newTuple(input.getAll().size() - 2);
for (int i = 1; i < input.getAll().size() - 2; i ++) {
output.set(i, input.get(i+2));
}
long offset = calculateOffset((Integer) input.get(0));
output.set(0, offset + (Long)input.get(2));
return output;
}
代码示例来源:origin: apache/phoenix
Tuple t = tupleFactory.newTuple();
t.append(1);
t.append(dt);
t.append(dt);
t.append(dt);
代码示例来源:origin: thedatachef/varaha
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() < 1 || input.isNull(0))
return null;
// Output bag
DataBag bagOfTokens = bagFactory.newDefaultBag();
StringReader textInput = new StringReader(input.get(0).toString());
PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");
for (CoreLabel label; ptbt.hasNext(); ) {
label = (CoreLabel)ptbt.next();
Tuple termText = tupleFactory.newTuple(label.toString());
bagOfTokens.add(termText);
}
return bagOfTokens;
}
}
代码示例来源:origin: elastic/elasticsearch-hadoop
dataMap = reader.getCurrentValue();
Tuple tuple = TupleFactory.getInstance().newTuple(dataMap.size());
tuple.set(i, result);
Set<Entry<?, ?>> entrySet = dataMap.entrySet();
for (Map.Entry entry : entrySet) {
tuple.set(i++, entry.getValue());
代码示例来源:origin: org.apache.pig/pig
public DateTime exec(Tuple input) throws IOException {
if (input == null || input.size() < 1 || input.get(0) == null) {
return null;
}
String dtStr = DataType.toString(input.get(0));
return ToDate.extractDateTime(dtStr);
}
代码示例来源:origin: org.apache.pig/pig
@Override
public Tuple exec(Tuple input) throws IOException {
// Since Initial is guaranteed to be called
// only in the map, it will be called with an
// input of a bag with a single tuple - the
// count should always be 1 if bag is non empty
DataBag bag = (DataBag)input.get(0);
return mTupleFactory.newTuple(bag.iterator().hasNext()?
Long.valueOf(1L) : Long.valueOf(0L));
}
}
代码示例来源:origin: pl.edu.icm.coansys/commons
@Override
public Tuple exec(Tuple input) throws IOException {
Tuple t = TupleFactory.getInstance().newTuple();
t.append(((DataByteArray) input.get(0)).toString());
return t;
}
内容来源于网络,如有侵权,请联系作者删除!