本文整理了Java中cz.seznam.euphoria.core.client.dataset.Dataset
类的一些代码示例,展示了Dataset
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dataset
类的具体详情如下:
包路径:cz.seznam.euphoria.core.client.dataset.Dataset
类名称:Dataset
[英]A dataset abstraction.
[中]数据集抽象。
代码示例来源:origin: seznam/euphoria
public <S extends DataSink<T>> Dataset<T> persist(S dst) {
this.wrap.persist(dst);
return this;
}
代码示例来源:origin: seznam/euphoria
@SuppressWarnings("unchecked")
private static void applySinkTransforms(Flow flow) {
List<Dataset<?>> outputs = flow.operators().stream()
.filter(o -> o.output().getOutputSink() != null)
.map(Operator::output)
.collect(Collectors.toList());
outputs.forEach(d -> {
if (d.getOutputSink().prepareDataset((Dataset) d)) {
// remove the old output sink
d.persist(null);
}
});
}
代码示例来源:origin: seznam/euphoria
/**
* Create output dataset for given operator.
*
* @param <IN> the type of elements of the input dataset
* @param <OUT> the type of elements in the output dataset
*
* @param flow the flow to associate the output dataset with
* @param input the input dataset the output dataset is indirectly derived from
* @param op the operator producing the output dataset
*
* @return a dataset representing the output of the given operator
*/
public static <IN, OUT> Dataset<OUT> createOutputFor(
Flow flow, Dataset<IN> input, Operator<IN, OUT> op) {
return new OutputDataset<>(flow, op, input.isBounded());
}
代码示例来源:origin: seznam/euphoria
/**
* Validate that no two output datasets use the same sink.
* This is not supported, because we cannot clone the sink
* (it can be used in client code after the flow has completed).
*/
@SuppressWarnings("unchecked")
private static void checkSinks(DAG<Operator<?, ?>> dag) {
List<Pair<Dataset, DataSink>> outputs = dag.nodes()
.filter(n -> n.output().getOutputSink() != null)
.map(o -> Pair.of((Dataset) o.output(), (DataSink) o.output().getOutputSink()))
.collect(Collectors.toList());
Map<DataSink, Dataset> sinkDatasets = new HashMap<>();
outputs.forEach(p -> {
Dataset current = sinkDatasets.get(p.getSecond());
if (current != null) {
throw new IllegalArgumentException(
"Operator " + current.getProducer().getName() + " and "
+ " operator " + p.getFirst().getProducer().getName()
+ " use the same sink " + p.getSecond());
}
sinkDatasets.put(p.getSecond(), p.getFirst());
});
}
代码示例来源:origin: seznam/euphoria
ds.persist(HFileSink.newBuilder()
.withConfiguration(conf)
.withZookeeperQuorum(outputQuorum)
.withTable(table)
.withOutputPath(new Path(tmpDir))
.applyIf(ds.isBounded(),
b -> b.windowBy(GlobalWindowing.get(), w -> ""),
b -> b.windowBy(
代码示例来源:origin: seznam/euphoria
InputOperator(Dataset<T> ds) {
super("InputOperator", ds.getFlow());
this.ds = ds;
this.hints = Collections.emptySet();
}
代码示例来源:origin: seznam/euphoria
InputProvider get(Operator<?, ?> source, Operator<?, ?> target) {
Pair<Operator<?, ?>, Operator<?, ?>> edge = Pair.of(source, target);
InputProvider sup = materializedOutputs.get(edge);
if (sup == null) {
throw new IllegalArgumentException(String.format(
"Do not have suppliers for edge %s -> %s (original producer %s )",
source, target, source.output().getProducer()));
}
return sup;
}
void markRunning(Operator<?, ?> operator) {
代码示例来源:origin: seznam/euphoria
.stream()
.map(Node::get)
.filter(op -> op.output().getOutputSink() != null)
.forEach(op -> {
final DataSink<?> sink = op.output().getOutputSink();
sinks.add(sink);
DataSet<?> flinkOutput =
代码示例来源:origin: seznam/euphoria
DataSource<?> raw = op.output().getSource();
if (raw.isBounded()) {
BoundedDataSource<?> source = raw.asBounded();
代码示例来源:origin: seznam/euphoria
public <LEFT, RIGHT> ByBuilder<LEFT, RIGHT> of(Dataset<LEFT> left, Dataset<RIGHT> right) {
if (right.getFlow() != left.getFlow()) {
throw new IllegalArgumentException("Pass inputs from the same flow");
}
return new ByBuilder<>(name, left, right);
}
}
代码示例来源:origin: seznam/euphoria
InputProvider get(Operator<?, ?> source, Operator<?, ?> target) {
Pair<Operator<?, ?>, Operator<?, ?>> edge = Pair.of(source, target);
InputProvider sup = materializedOutputs.get(edge);
if (sup == null) {
throw new IllegalArgumentException(String.format(
"Do not have suppliers for edge %s -> %s (original producer %s )",
source, target, source.output().getProducer()));
}
return sup;
}
void markRunning(Operator<?, ?> operator) {
代码示例来源:origin: seznam/euphoria
.stream()
.map(Node::get)
.filter(op -> op.output().getOutputSink() != null)
.forEach(op -> {
final DataSink<?> sink = op.output().getOutputSink();
sinks.add(sink);
JavaRDD<SparkElement> sparkOutput =
代码示例来源:origin: seznam/euphoria
/**
* Validate that no two output datasets use the same sink.
* This is not supported, because we cannot clone the sink
* (it can be used in client code after the flow has completed).
*/
@SuppressWarnings("unchecked")
private static void checkSinks(DAG<Operator<?, ?>> dag) {
List<Pair<Dataset, DataSink>> outputs = dag.nodes()
.filter(n -> n.output().getOutputSink() != null)
.map(o -> Pair.of((Dataset) o.output(), (DataSink) o.output().getOutputSink()))
.collect(Collectors.toList());
Map<DataSink, Dataset> sinkDatasets = new HashMap<>();
outputs.forEach(p -> {
Dataset current = sinkDatasets.get(p.getSecond());
if (current != null) {
throw new IllegalArgumentException(
"Operator " + current.getProducer().getName() + " and "
+ " operator " + p.getFirst().getProducer().getName()
+ " use the same sink " + p.getSecond());
}
sinkDatasets.put(p.getSecond(), p.getFirst());
});
}
代码示例来源:origin: seznam/euphoria
output = createStream(op.output().getSource());
} else if (op instanceof FlatMap) {
output = execMap((Node) node, context);
代码示例来源:origin: seznam/euphoria
/**
* Persist given dataset into this sink via given mapper.
* @param <T> input datatype
* @param input the input dataset
* @param mapper map function for transformation of input value into {@link Cell}.
*/
public <T> void persist(Dataset<T> input, UnaryFunction<T, Cell> mapper) {
MapElements.of(input)
.using(mapper)
.output()
.persist(this);
}
代码示例来源:origin: seznam/euphoria
@SuppressWarnings("unchecked")
private static void applySinkTransforms(Flow flow) {
List<Dataset<?>> outputs = flow.operators().stream()
.filter(o -> o.output().getOutputSink() != null)
.map(Operator::output)
.collect(Collectors.toList());
outputs.forEach(d -> {
if (d.getOutputSink().prepareDataset((Dataset) d)) {
// remove the old output sink
d.persist(null);
}
});
}
代码示例来源:origin: seznam/euphoria
public <LEFT, RIGHT> ByBuilder<LEFT, RIGHT> of(Dataset<LEFT> left, Dataset<RIGHT> right) {
if (right.getFlow() != left.getFlow()) {
throw new IllegalArgumentException("Pass inputs from the same flow");
}
return new ByBuilder<>(name, left, right);
}
}
代码示例来源:origin: seznam/euphoria
@SuppressWarnings("unchecked")
public static boolean wantTranslateBroadcastHashJoin(Join o) {
final ArrayList<Dataset> inputs = new ArrayList(o.listInputs());
if (inputs.size() != 2) {
return false;
}
final Dataset leftDataset = inputs.get(0);
final Dataset rightDataset = inputs.get(1);
return
(o.getType() == Join.Type.LEFT && hasFitsInMemoryHint(rightDataset.getProducer()) ||
o.getType() == Join.Type.RIGHT && hasFitsInMemoryHint(leftDataset.getProducer())
) && !(o.getWindowing() instanceof MergingWindowing);
}
代码示例来源:origin: seznam/euphoria
.stream()
.map(Node::get)
.filter(op -> op.output().getOutputSink() != null)
.forEach(op -> {
final DataSink<?> sink = op.output().getOutputSink();
sinks.add(sink);
DataStream<?> flinkOutput =
代码示例来源:origin: seznam/euphoria
/**
* Checks if the given {@link Flow} reads bounded inputs
*
* @param flow the flow to inspect
*
* @return {@code true} if all sources are bounded
*/
private boolean isBoundedInput(Flow flow) {
// check if sources are bounded or not
for (Dataset<?> ds : flow.sources()) {
if (!ds.isBounded()) {
return false;
}
}
return true;
}
内容来源于网络,如有侵权,请联系作者删除!