org.apache.hadoop.io.Text类的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(13.5k)|赞(0)|评价(0)|浏览(549)

本文整理了Java中org.apache.hadoop.io.Text类的一些代码示例,展示了Text类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Text类的具体详情如下:
包路径:org.apache.hadoop.io.Text
类名称:Text

Text介绍

[英]This class stores text using standard UTF8 encoding. It provides methods to serialize, deserialize, and compare texts at byte level. The type of length is integer and is serialized using zero-compressed format.

In addition, it provides methods for string traversal without converting the byte array to a string.

Also includes utilities for serializing/deserialing a string, coding/decoding a string, checking if a byte array contains valid UTF8 code, calculating the length of an encoded string.
[中]此类使用标准UTF8编码存储文本。它提供了在字节级别序列化、反序列化和比较文本的方法。长度类型为整数,并使用零压缩格式序列化。
此外,它还提供了在不将字节数组转换为字符串的情况下遍历字符串的方法。
还包括用于序列化/反序列化字符串、编码/解码字符串、检查字节数组是否包含有效的UTF8代码、计算编码字符串长度的实用程序。

代码示例

代码示例来源:origin: apache/hbase

private Text getKey(Path path) {
 int level = conf.getInt(NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 1);
 int count = 0;
 String relPath = "";
 while (count++ < level) {
  relPath = Path.SEPARATOR + path.getName() + relPath;
  path = path.getParent();
 }
 return new Text(relPath);
}

代码示例来源:origin: apache/hbase

@Override
 public void map(LongWritable key, Text value, Context context)
   throws InterruptedException, IOException {
  Path path = new Path(value.toString());
  this.compactor.compact(path, compactOnce, major);
 }
}

代码示例来源:origin: prestodb/presto

String tableId = connector.tableOperations().tableIdMap().get(table);
Scanner scanner = connector.createScanner("accumulo.metadata", auths);
scanner.fetchColumnFamily(new Text("loc"));
  Iterator<Entry<Key, Value>> iter = scanner.iterator();
  if (iter.hasNext()) {
    location = Optional.of(iter.next().getValue().toString());
  Text splitCompareKey = new Text();
  key.getRow(splitCompareKey);
  Text scannedCompareKey = new Text();
    byte[] keyBytes = entry.getKey().getRow().copyBytes();
      location = Optional.of(entry.getValue().toString());
      break;
      scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);
      if (scannedCompareKey.getLength() > 0) {
        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
        if (compareTo <= 0) {
          location = Optional.of(entry.getValue().toString());

代码示例来源:origin: apache/hive

public IntWritable evaluate(Text s) {
  if (s == null) {
   return null;
  }

  if (s.getLength() > 0) {
   result.set(s.getBytes()[0]);
  } else {
   result.set(0);
  }

  return result;
 }
}

代码示例来源:origin: prestodb/presto

@Override
public void deserialize(Entry<Key, Value> entry)
{
  if (!columnValues.containsKey(rowIdName)) {
    entry.getKey().getRow(rowId);
    columnValues.put(rowIdName, rowId.toString());
  }
  if (rowOnly) {
    return;
  }
  entry.getKey().getColumnFamily(family);
  entry.getKey().getColumnQualifier(qualifier);
  if (family.equals(ROW_ID_COLUMN) && qualifier.equals(ROW_ID_COLUMN)) {
    return;
  }
  value.set(entry.getValue().get());
  columnValues.put(familyQualifierColumnMap.get(family.toString()).get(qualifier.toString()), value.toString());
}

代码示例来源:origin: apache/hive

@Override public byte[] getBytes(Text writable) {
 //@TODO  There is no reason to decode then encode the string to bytes really
 //@FIXME this issue with CTRL-CHAR ^0 added by Text at the end of string and Json serd does not like that.
 try {
  return Text.decode(writable.getBytes(), 0, writable.getLength()).getBytes(Charset.forName("UTF-8"));
 } catch (CharacterCodingException e) {
  throw new RuntimeException(e);
 }
}

代码示例来源:origin: apache/avro

@BeforeClass
public static void testWriteSequenceFile() throws IOException {
 Configuration c = new Configuration();
 URI uri = file().toURI();
 try(SequenceFile.Writer writer
  = new SequenceFile.Writer(FileSystem.get(uri, c), c,
               new Path(uri.toString()),
               LongWritable.class, Text.class)) {
  final LongWritable key = new LongWritable();
  final Text val = new Text();
  for (int i = 0; i < COUNT; ++i) {
   key.set(i);
   val.set(Integer.toString(i));
   writer.append(key, val);
  }
 }
}

代码示例来源:origin: org.apache.hadoop/hadoop-mapred-test

public InputSplit[] getSplits(JobConf job, int numSplits)
  throws IOException {
 Path src = new Path(job.get(
  org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
  null));
 FileSystem fs = src.getFileSystem(job);
 ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits);
 LongWritable key = new LongWritable();
 Text value = new Text();
 for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job);
    sl.next(key, value);) {
  splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
 }
 return splits.toArray(new IndirectSplit[splits.size()]);
}

代码示例来源:origin: apache/incubator-gobblin

/**
  * Get token from the token sequence file.
  * @param authPath
  * @param proxyUserName
  * @return Token for proxyUserName if it exists.
  * @throws IOException
  */
 private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException {
  try (Closer closer = Closer.create()) {
   FileSystem localFs = FileSystem.getLocal(new Configuration());
   SequenceFile.Reader tokenReader =
     closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf()));
   Text key = new Text();
   Token<?> value = new Token<>();
   while (tokenReader.next(key, value)) {
    LOG.info("Found token for " + key);
    if (key.toString().equals(proxyUserName)) {
     return Optional.<Token<?>> of(value);
    }
   }
  }
  return Optional.absent();
 }
}

代码示例来源:origin: intel-hadoop/HiBench

private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
  
  FileSystem fs = FileSystem.get(mfile.toUri(), job);
  if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {
    DistributedCache.createSymlink(job);
    
    FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
    
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i=0; i<fstats.length; i++) {
      Path curMap = fstats[i].getPath();
      MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
      if (mreader.next(key, value)) {
        int rid = (int) (key.get() % slots);
        String uriWithLink =
            curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
        DistributedCache.addCacheFile(new URI(uriWithLink), job);
      } else {
        System.exit(-1);
      }
      mreader.close();
    }
  }
  
  job.setInt(symbol, slots);
}

代码示例来源:origin: apache/incubator-gobblin

public JobState.DatasetState getInternal(String storeName, String tableName, String stateId,
  boolean sanitizeKeyForComparison)
  throws IOException {
 Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
 if (!this.fs.exists(tablePath)) {
  return null;
 Configuration deserializeConf = new Configuration(this.conf);
 WritableShimSerialization.addToHadoopConfiguration(deserializeConf);
 try (@SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, tablePath,
   Text key = new Text();
      sanitizeKeyForComparison ? sanitizeDatasetStatestoreNameFromDatasetURN(storeName, key.toString())
        : key.toString();
    writable = reader.getCurrentValue(writable);
    if (stringKey.equals(stateId)) {

代码示例来源:origin: voldemort/voldemort

@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split,
                                 JobConf conf,
                                 Reporter reporter)
    throws IOException {
  String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
  log.info("Input file path:" + inputPathString);
  Path inputPath = new Path(inputPathString);
  SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf),
                             inputPath,
                             conf);
  SequenceFile.Metadata meta = reader.getMetadata();
  try {
    Text keySchema = meta.get(new Text("key.schema"));
    Text valueSchema = meta.get(new Text("value.schema"));
    if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
      throw new Exception();
    }
    // update Joboconf with schemas
    conf.set("mapper.input.key.schema", keySchema.toString());
    conf.set("mapper.input.value.schema", valueSchema.toString());
  } catch(Exception e) {
    throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
  }
  return super.getRecordReader(split, conf, reporter);
}

代码示例来源:origin: apache/hive

@SuppressWarnings({"rawtypes", "unchecked"})
private static Path createEmptyFile(Path hiveScratchDir,
  HiveOutputFormat outFileFormat, JobConf job,
  Properties props, boolean dummyRow)
    throws IOException, InstantiationException, IllegalAccessException {
 // create a dummy empty file in a new directory
 String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString();
 Path newPath = new Path(newDir);
 FileSystem fs = newPath.getFileSystem(job);
 fs.mkdirs(newPath);
 //Qualify the path against the file system. The user configured path might contain default port which is skipped
 //in the file status. This makes sure that all paths which goes into PathToPartitionInfo are always listed status
 //file path.
 newPath = fs.makeQualified(newPath);
 String newFile = newDir + Path.SEPARATOR + "emptyFile";
 Path newFilePath = new Path(newFile);
 RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath,
   Text.class, false, props, null);
 if (dummyRow) {
  // empty files are omitted at CombineHiveInputFormat.
  // for meta-data only query, it effectively makes partition columns disappear..
  // this could be fixed by other methods, but this seemed to be the most easy (HIVEV-2955)
  recWriter.write(new Text("empty"));  // written via HiveIgnoreKeyTextOutputFormat
 }
 recWriter.close(false);
 return StringInternUtils.internUriStringsInPath(newPath);
}

代码示例来源:origin: apache/incubator-gobblin

@BeforeClass
public void setUp() throws IOException {
 this.configuration = new Configuration();
 this.fileSystem = FileSystem.getLocal(this.configuration);
 this.tokenFilePath = new Path(HelixUtilsTest.class.getSimpleName(), "token");
 this.token = new Token<>();
 this.token.setKind(new Text("test"));
 this.token.setService(new Text("test"));
}

代码示例来源:origin: apache/storm

private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException {
  Configuration conf = new Configuration();
  try {
    if (fs.exists(file)) {
      fs.delete(file, false);
    }
    SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class);
    for (int i = 0; i < rowCount; i++) {
      w.append(new IntWritable(i), new Text("line " + i));
    }
    w.close();
    System.out.println("done");
  } catch (IOException e) {
    e.printStackTrace();
  }
}

代码示例来源:origin: org.apache.hadoop/hadoop-mapreduce-client-core

@SuppressWarnings("unchecked")
@Test
public void testInitNextRecordReader() throws IOException{
 JobConf conf = new JobConf();
 Path[] paths = new Path[3];
 long[] fileLength = new long[3];
 File[] files = new File[3];
 LongWritable key = new LongWritable(1);
 Text value = new Text();
 try {
  for(int i=0;i<3;i++){
   fileLength[i] = i;
   File dir = new File(outDir.toString());
   dir.mkdir();
   files[i] = new File(dir,"testfile"+i);
   FileWriter fileWriter = new FileWriter(files[i]);
   fileWriter.close();
   paths[i] = new Path(outDir+"/testfile"+i);
  verify(reporter, times(3)).progress();
 } finally {
  FileUtil.fullyDelete(new File(outDir.toString()));

代码示例来源:origin: apache/incubator-gobblin

@Test
@SuppressWarnings("deprecation")
public void testSerializeToSequenceFile() throws IOException {
 Closer closer = Closer.create();
 Configuration conf = new Configuration();
 WritableShimSerialization.addToHadoopConfiguration(conf);
 try {
  SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf,
    new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class));
  Text key = new Text();
  WorkUnitState workUnitState = new WorkUnitState();
  TestWatermark watermark = new TestWatermark();
  watermark.setLongWatermark(10L);
  workUnitState.setActualHighWatermark(watermark);
  writer1.append(key, workUnitState);
  SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf,
    new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class));
  watermark.setLongWatermark(100L);
  workUnitState.setActualHighWatermark(watermark);
  writer2.append(key, workUnitState);
 } catch (Throwable t) {
  throw closer.rethrow(t);
 } finally {
  closer.close();
 }
}

代码示例来源:origin: ch.cern.hadoop/hadoop-extras

static private void finalize(Configuration conf, JobConf jobconf,
  final Path destPath, String presevedAttributes) throws IOException {
 if (presevedAttributes == null) {
  return;
 }
 EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
 if (!preseved.contains(FileAttribute.USER)
   && !preseved.contains(FileAttribute.GROUP)
   && !preseved.contains(FileAttribute.PERMISSION)) {
  return;
 }
 FileSystem dstfs = destPath.getFileSystem(conf);
 Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
 try (SequenceFile.Reader in =
   new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
  Text dsttext = new Text();
  FilePair pair = new FilePair(); 
  for(; in.next(dsttext, pair); ) {
   Path absdst = new Path(destPath, pair.output);
   updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
     preseved, dstfs);
  }
 }
}

代码示例来源:origin: apache/hbase

private static ExportProtos.ExportRequest getConfiguredRequest(Configuration conf,
    Path dir, final Scan scan, final Token<?> userToken) throws IOException {
 boolean compressed = conf.getBoolean(FileOutputFormat.COMPRESS, false);
 String compressionType = conf.get(FileOutputFormat.COMPRESS_TYPE,
     DEFAULT_TYPE.toString());
 String compressionCodec = conf.get(FileOutputFormat.COMPRESS_CODEC,
     DEFAULT_CODEC.getName());
 DelegationToken protoToken = null;
 if (userToken != null) {
  protoToken = DelegationToken.newBuilder()
      .setIdentifier(ByteStringer.wrap(userToken.getIdentifier()))
      .setPassword(ByteStringer.wrap(userToken.getPassword()))
      .setKind(userToken.getKind().toString())
      .setService(userToken.getService().toString()).build();
 }
 LOG.info("compressed=" + compressed
     + ", compression type=" + compressionType
     + ", compression codec=" + compressionCodec
     + ", userToken=" + userToken);
 ExportProtos.ExportRequest.Builder builder = ExportProtos.ExportRequest.newBuilder()
     .setScan(ProtobufUtil.toScan(scan))
     .setOutputPath(dir.toString())
     .setCompressed(compressed)
     .setCompressCodec(compressionCodec)
     .setCompressType(compressionType);
 if (protoToken != null) {
  builder.setFsToken(protoToken);
 }
 return builder.build();
}

代码示例来源:origin: apache/ignite

final List<String> list = new ArrayList<>();
final boolean snappyDecode = conf != null && conf.getBoolean(FileOutputFormat.COMPRESS, false);
      SequenceFile.Reader.file(new Path(fileName)))) {
    Text key = new Text();
    IntWritable val = new IntWritable();

相关文章