org.apache.spark.api.java.JavaPairRDD.take()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(7.3k)|赞(0)|评价(0)|浏览(214)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.take()方法的一些代码示例,展示了JavaPairRDD.take()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.take()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:take

JavaPairRDD.take介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

public Void call(JavaPairRDD<Integer, Long> rdd) {
 currentResponseCodeCounts = rdd.take(100);
 return null;
}});

代码示例来源:origin: databricks/learning-spark

public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
  Tuple4<Long, Long, Long, Long> contentSizeStats =
    Functions.contentSizeStats(accessLogs);
  List<Tuple2<Integer, Long>> responseCodeToCount =
    Functions.responseCodeCount(accessLogs)
    .take(100);
  JavaPairRDD<String, Long> ipAddressCounts =
    Functions.ipAddressCount(accessLogs);
  List<String> ip = Functions.filterIPAddress(ipAddressCounts)
    .take(100);
  Object ordering = Ordering.natural();
  Comparator<Long> cmp = (Comparator<Long>)ordering;
  List<Tuple2<String, Long>> topEndpoints =
    Functions.endpointCount(accessLogs)
   .top(10, new Functions.ValueComparator<String, Long>(cmp));
  logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount,
    ip, topEndpoints);
  return null;
   }});
}

代码示例来源:origin: apache/tinkerpop

@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

代码示例来源:origin: apache/tinkerpop

@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

return Util.closestPoint(in._2(), centroids) == index;
}).take(numArticles);

代码示例来源:origin: jaibeermalik/searchanalytics-bigdata

@Override
  public Void call(JavaPairRDD<Integer, String> rdd) {
    String out = "\nSpark, Top 10 entries for stream id: " + rdd.id() + "\n";
    for (Tuple2<Integer, String> t : rdd.take(10)) {
      out = out + t.toString() + "\n";
    }
    System.out.println(out);
    return null;
  }
});

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

代码示例来源:origin: ypriverol/spark-java8

for (int i = 0; i < centroids.size(); i++) {
  final int index = i;
  List<Tuple2<String, Vector>> samples = data.filter((Function<Tuple2<String, Vector>, Boolean>) in -> Util.closestPoint(in._2(), centroids) == index).take(numArticles);

代码示例来源:origin: oeljeklaus-you/UserActionAnalyzePlatform

List<Tuple2<CategorySortKey,String>> top10CategoryList=sortedCategoryRDD.take(10);
List<Top10Category> top10Categories=new ArrayList<Top10Category>();
for(Tuple2<CategorySortKey,String> tuple2:top10CategoryList)

代码示例来源:origin: ypriverol/spark-java8

for (Tuple2<Integer, String> t: rdd.take(25)) out.append(t.toString()).append("\n");

代码示例来源:origin: Erik-ly/SprakProject

sortedCategoryCountRDD.take(10);
for(Tuple2<CategorySortKey, String> tuple : top10CategoryList) {
  String countInfo = tuple._2;

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法