本文整理了Java中org.apache.spark.api.java.JavaPairRDD.take()
方法的一些代码示例,展示了JavaPairRDD.take()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.take()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:take
暂无
代码示例来源:origin: databricks/learning-spark
public Void call(JavaPairRDD<Integer, Long> rdd) {
currentResponseCodeCounts = rdd.take(100);
return null;
}});
代码示例来源:origin: databricks/learning-spark
public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
Tuple4<Long, Long, Long, Long> contentSizeStats =
Functions.contentSizeStats(accessLogs);
List<Tuple2<Integer, Long>> responseCodeToCount =
Functions.responseCodeCount(accessLogs)
.take(100);
JavaPairRDD<String, Long> ipAddressCounts =
Functions.ipAddressCount(accessLogs);
List<String> ip = Functions.filterIPAddress(ipAddressCounts)
.take(100);
Object ordering = Ordering.natural();
Comparator<Long> cmp = (Comparator<Long>)ordering;
List<Tuple2<String, Long>> topEndpoints =
Functions.endpointCount(accessLogs)
.top(10, new Functions.ValueComparator<String, Long>(cmp));
logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount,
ip, topEndpoints);
return null;
}});
}
代码示例来源:origin: apache/tinkerpop
@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
final Configuration configuration = new BaseConfiguration();
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
try {
if (InputRDD.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
} else if (InputFormat.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
}
} catch (final Exception e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}
代码示例来源:origin: apache/tinkerpop
@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
final Configuration configuration = new BaseConfiguration();
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
try {
if (InputRDD.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
} else if (InputFormat.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
}
} catch (final Exception e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
return Util.closestPoint(in._2(), centroids) == index;
}).take(numArticles);
代码示例来源:origin: jaibeermalik/searchanalytics-bigdata
@Override
public Void call(JavaPairRDD<Integer, String> rdd) {
String out = "\nSpark, Top 10 entries for stream id: " + rdd.id() + "\n";
for (Tuple2<Integer, String> t : rdd.take(10)) {
out = out + t.toString() + "\n";
}
System.out.println(out);
return null;
}
});
代码示例来源:origin: org.apache.tinkerpop/spark-gremlin
@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
final Configuration configuration = new BaseConfiguration();
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
try {
if (InputRDD.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
} else if (InputFormat.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
}
} catch (final Exception e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}
代码示例来源:origin: org.apache.tinkerpop/spark-gremlin
@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
final Configuration configuration = new BaseConfiguration();
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
try {
if (InputRDD.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
} else if (InputFormat.class.isAssignableFrom(readerClass)) {
return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
}
} catch (final Exception e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}
代码示例来源:origin: ypriverol/spark-java8
for (int i = 0; i < centroids.size(); i++) {
final int index = i;
List<Tuple2<String, Vector>> samples = data.filter((Function<Tuple2<String, Vector>, Boolean>) in -> Util.closestPoint(in._2(), centroids) == index).take(numArticles);
代码示例来源:origin: oeljeklaus-you/UserActionAnalyzePlatform
List<Tuple2<CategorySortKey,String>> top10CategoryList=sortedCategoryRDD.take(10);
List<Top10Category> top10Categories=new ArrayList<Top10Category>();
for(Tuple2<CategorySortKey,String> tuple2:top10CategoryList)
代码示例来源:origin: ypriverol/spark-java8
for (Tuple2<Integer, String> t: rdd.take(25)) out.append(t.toString()).append("\n");
代码示例来源:origin: Erik-ly/SprakProject
sortedCategoryCountRDD.take(10);
for(Tuple2<CategorySortKey, String> tuple : top10CategoryList) {
String countInfo = tuple._2;
内容来源于网络,如有侵权,请联系作者删除!