本文整理了Java中org.apache.spark.api.java.JavaPairRDD.flatMap()
方法的一些代码示例,展示了JavaPairRDD.flatMap()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.flatMap()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:flatMap
暂无
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
if (args.length != 3) {
throw new Exception("Usage BasicLoadCsv sparkMaster csvInputFile csvOutputFile key");
}
String master = args[0];
String csvInput = args[1];
String outputFile = args[2];
final String key = args[3];
JavaSparkContext sc = new JavaSparkContext(
master, "loadwholecsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaPairRDD<String, String> csvData = sc.wholeTextFiles(csvInput);
JavaRDD<String[]> keyedRDD = csvData.flatMap(new ParseLine());
JavaRDD<String[]> result =
keyedRDD.filter(new Function<String[], Boolean>() {
public Boolean call(String[] input) { return input[0].equals(key); }});
result.saveAsTextFile(outputFile);
}
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
triadsGrouped.flatMap((Tuple2<Tuple2<Long,Long>, Iterable<Long>> s) -> {
代码示例来源:origin: mahmoudparsian/data-algorithms-book
triadsGrouped.flatMap(new FlatMapFunction<
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public <T> SparkCollection<T> flatMap(FlatMapFunction<Tuple2<K, V>, T> function) {
return new RDDCollection<>(sec, jsc, datasetContext, sinkFactory, pairRDD.flatMap(function));
}
代码示例来源:origin: seznam/euphoria
.flatMap(
new FlatMapFunctionWithCollector<>(
(t, collector) -> {
代码示例来源:origin: cloudera-labs/envelope
@SuppressWarnings( {"rawtypes", "unchecked"})
private Dataset<Row> readInputFormat(String path) throws Exception {
LOG.debug("Reading InputFormat[{}]: {}", inputType, path);
Class<? extends InputFormat> typeClazz = Class.forName(inputType).asSubclass(InputFormat.class);
Class<?> keyClazz = Class.forName(keyType);
Class<?> valueClazz = Class.forName(valueType);
@SuppressWarnings("resource")
JavaSparkContext context = new JavaSparkContext(Contexts.getSparkSession().sparkContext());
JavaPairRDD<?, ?> rdd = context.newAPIHadoopFile(path, typeClazz, keyClazz, valueClazz, new Configuration());
TranslateFunction translateFunction = new TranslateFunction(translatorConfig);
return Contexts.getSparkSession().createDataFrame(rdd.flatMap(translateFunction), translateFunction.getSchema());
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public JavaRDD<OUT> call(JavaPairRDD<JOIN_KEY, List<JoinElement<INPUT_RECORD>>> input,
Time batchTime) throws Exception {
if (function == null) {
function = Compat.convert(
new JoinMergeFunction<JOIN_KEY, INPUT_RECORD, OUT>(dynamicDriverContext.getPluginFunctionContext()));
}
return input.flatMap(function);
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public JavaRDD<RecordInfo<Object>> call(JavaPairRDD<GROUP_KEY, Iterable<GROUP_VAL>> input,
Time batchTime) throws Exception {
if (function == null) {
function = Compat.convert(
new AggregatorAggregateFunction<GROUP_KEY, GROUP_VAL, OUT>(dynamicDriverContext.getPluginFunctionContext()));
}
return input.flatMap(function);
}
}
代码示例来源:origin: cloudera-labs/envelope
@SuppressWarnings({ "unchecked", "rawtypes" })
public JavaRDD<Row> translate(JavaRDD raw) {
TranslateFunction translateFunction = getTranslateFunction(config);
JavaPairRDD<?, ?> prepared = raw.mapToPair(((StreamInput)getInput(true)).getPrepareFunction());
JavaRDD<Row> translated = prepared.flatMap(translateFunction);
return translated;
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
protected SparkCollection<RecordInfo<Object>> getSource(StageSpec stageSpec, StageStatisticsCollector collector) {
PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
return new RDDCollection<>(sec, jsc, datasetContext, sinkFactory,
sourceFactory.createRDD(sec, jsc, stageSpec.getName(), Object.class, Object.class)
.flatMap(Compat.convert(new BatchSourceFunction(pluginFunctionContext,
numOfRecordsPreview))));
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions,
StageStatisticsCollector collector) {
PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
PairFlatMapFunc<T, Object, T> groupByFunction = new AggregatorGroupByFunction<>(pluginFunctionContext);
PairFlatMapFunction<T, Object, T> sparkGroupByFunction = Compat.convert(groupByFunction);
JavaPairRDD<Object, T> keyedCollection = rdd.flatMapToPair(sparkGroupByFunction);
JavaPairRDD<Object, Iterable<T>> groupedCollection = partitions == null ?
keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions);
FlatMapFunc<Tuple2<Object, Iterable<T>>, RecordInfo<Object>> aggregateFunction =
new AggregatorAggregateFunction<>(pluginFunctionContext);
FlatMapFunction<Tuple2<Object, Iterable<T>>, RecordInfo<Object>> sparkAggregateFunction =
Compat.convert(aggregateFunction);
return wrap(groupedCollection.flatMap(sparkAggregateFunction));
}
代码示例来源:origin: cloudera-labs/envelope
private JavaRDD<Row> planMutationsByKey(Dataset<Row> arriving, List<String> keyFieldNames,
Config plannerConfig, Config outputConfig) {
JavaPairRDD<Row, Row> keyedArriving =
arriving.javaRDD().keyBy(new ExtractKeyFunction(keyFieldNames, accumulators));
JavaPairRDD<Row, Iterable<Row>> arrivingByKey =
keyedArriving.groupByKey(getPartitioner(keyedArriving));
JavaPairRDD<Row, Tuple2<Iterable<Row>, Iterable<Row>>> arrivingAndExistingByKey =
arrivingByKey.mapPartitionsToPair(new JoinExistingForKeysFunction(outputConfig, keyFieldNames, accumulators));
JavaRDD<Row> planned =
arrivingAndExistingByKey.flatMap(new PlanForKeyFunction(plannerConfig, accumulators));
return planned;
}
代码示例来源:origin: sryza/simplesparkapp
JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
new FlatMapFunction<Tuple2<String, Integer>, Character>() {
@Override
代码示例来源:origin: org.datavec/datavec-spark
leftJV.cogroup(rightJV);
return cogroupedJV.flatMap(new ExecuteJoinFromCoGroupFlatMapFunction(join));
代码示例来源:origin: org.datavec/datavec-spark_2.11
leftJV.cogroup(rightJV);
return cogroupedJV.flatMap(new ExecuteJoinFromCoGroupFlatMapFunction(join));
内容来源于网络,如有侵权,请联系作者删除!