org.apache.spark.api.java.function.Function类的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(9.3k)|赞(0)|评价(0)|浏览(98)

本文整理了Java中org.apache.spark.api.java.function.Function类的一些代码示例,展示了Function类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Function类的具体详情如下:
包路径:org.apache.spark.api.java.function.Function
类名称:Function

Function介绍

[英]Base interface for functions whose return types do not create special RDDs. PairFunction and DoubleFunction are handled separately, to allow PairRDDs and DoubleRDDs to be constructed when mapping RDDs of other types.
[中]返回类型不创建特殊RDD的函数的基本接口。PairFunction和DoubleFunction分别处理,以允许在映射其他类型的RDD时构造PairRDD和DoubleRDD。

代码示例

代码示例来源:origin: apache/kylin

@Override
public Tuple2<K, V> next() {
  if (!hasNext()) {
    throw new NoSuchElementException();
  }
  final LinkedList<V> values = new LinkedList();
  K currentKey = current._1();
  values.add(current._2());
  while (input.hasNext()) {
    Tuple2<K, V> next = input.next();
    if (comparator.compare(currentKey, next._1()) == 0) {
      values.add(next._2());
    } else {
      current = next;
      try {
        return new Tuple2<>(currentKey, converter.call(values));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }
  if (!input.hasNext()) {
    current = null;
  }
  try {
    return new Tuple2<>(currentKey, converter.call(values));
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

代码示例来源:origin: OryxProject/oryx

/**
 * Implementation which splits based solely on time. It will return approximately
 * the earliest {@link #getTestFraction()} of input, ordered by timestamp, as new training
 * data and the rest as test data.
 */
@Override
protected Pair<JavaRDD<String>,JavaRDD<String>> splitNewDataToTrainTest(JavaRDD<String> newData) {
 // Rough approximation; assumes timestamps are fairly evenly distributed
 StatCounter maxMin = newData.mapToDouble(line -> MLFunctions.TO_TIMESTAMP_FN.call(line).doubleValue()).stats();
 long minTime = (long) maxMin.min();
 long maxTime = (long) maxMin.max();
 log.info("New data timestamp range: {} - {}", minTime, maxTime);
 long approxTestTrainBoundary = (long) (maxTime - getTestFraction() * (maxTime - minTime));
 log.info("Splitting at timestamp {}", approxTestTrainBoundary);
 JavaRDD<String> newTrainData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) < approxTestTrainBoundary);
 JavaRDD<String> testData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) >= approxTestTrainBoundary);
 return new Pair<>(newTrainData, testData);
}

代码示例来源:origin: OryxProject/oryx

@Test(expected = ArrayIndexOutOfBoundsException.class)
public void testParseBadTimestamp() throws Exception {
 MLFunctions.TO_TIMESTAMP_FN.call("[1,2,3]");
}

代码示例来源:origin: OryxProject/oryx

@Test(expected = IOException.class)
public void testParseBadLine() throws Exception {
 MLFunctions.PARSE_FN.call("[1,]");
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testSumWithNaN() throws Exception {
 OryxTest.assertEquals(1.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0)).doubleValue());
 OryxTest.assertEquals(6.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, 3.0)).doubleValue());
 OryxTest.assertEquals(3.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, Double.NaN, 3.0)).doubleValue());
 assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, Double.NaN)));
 assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(Double.NaN)));
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testToTimestamp() throws Exception {
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123,").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123]").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123,\"d\"]").longValue());
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testParseJSON() throws Exception {
 assertArrayEquals(new String[]{"a", "1", "foo"},
          MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\"]"));
 assertArrayEquals(new String[]{"a", "1", "foo", ""},
          MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\",\"\"]"));
 assertArrayEquals(new String[]{"2.3"},
          MLFunctions.PARSE_FN.call("[\"2.3\"]"));
 assertArrayEquals(new String[]{},
          MLFunctions.PARSE_FN.call("[]"));
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testParseCSV() throws Exception {
 assertArrayEquals(new String[]{"a", "1", "foo"},
          MLFunctions.PARSE_FN.call("a,1,foo"));
 assertArrayEquals(new String[]{"a", "1", "foo", ""},
          MLFunctions.PARSE_FN.call("a,1,foo,"));
 assertArrayEquals(new String[]{"2.3"},
          MLFunctions.PARSE_FN.call("2.3"));
 // Different from JSON, sort of:
 assertArrayEquals(new String[]{""},
          MLFunctions.PARSE_FN.call(""));
}

代码示例来源:origin: OryxProject/oryx

JavaPairRDD<Tuple2<String,String>,Double> tuples = sortedValues.mapToPair(line -> {
 try {
  String[] tokens = MLFunctions.PARSE_FN.call(line);
  String user = tokens[0];
  String item = tokens[1];

代码示例来源:origin: org.apache.crunch/crunch-spark

@Override
 public R call(T t) throws Exception {
  return f.call(t);
 }
};

代码示例来源:origin: apache/crunch

@Override
 public R call(T t) throws Exception {
  return f.call(t);
 }
};

代码示例来源:origin: org.qcri.rheem/rheem-spark

@Override
public OutputType call(InputType v1) throws Exception {
  if (this.isFirstRun) {
    ((org.qcri.rheem.core.function.ExtendedFunction) this.impl).open(this.executionContext);
    this.isFirstRun = false;
  }
  return this.impl.call(v1);
}

代码示例来源:origin: apache/incubator-nemo

@Override
public void onData(final I element) {
  try {
   outputCollector.emit(func.call(element));
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
}

代码示例来源:origin: org.apache.beam/beam-runners-spark

/**
 * A utility method that adapts {@link Function} to a {@link FlatMapFunction} with an {@link
 * Iterator} input. This is particularly useful because it allows to use functions written for map
 * functions in flatmap functions.
 *
 * @param func the {@link Function} to adapt.
 * @param <InputT> the input type.
 * @param <OutputT> the output type.
 * @return a {@link FlatMapFunction} that accepts an {@link Iterator} as an input and applies the
 *     {@link Function} on every element.
 */
public static <InputT, OutputT>
  FlatMapFunction<Iterator<InputT>, OutputT> functionToFlatMapFunction(
    final Function<InputT, OutputT> func) {
 return itr -> {
  final Iterator<OutputT> outputItr =
    Iterators.transform(
      itr,
      t -> {
       try {
        return func.call(t);
       } catch (Exception e) {
        throw new RuntimeException(e);
       }
      });
  return outputItr;
 };
}

代码示例来源:origin: net.sansa-stack/sansa-rdf-spark

public static <I, O> org.apache.spark.api.java.function.Function<I, O> wrap(org.apache.spark.api.java.function.Function<I, O> fn) {
  JavaKryoSerializationWrapper<org.apache.spark.api.java.function.Function<I, O>> wrapper = new JavaKryoSerializationWrapper<>(fn);
  org.apache.spark.api.java.function.Function<I, O> result = i -> wrapper.getValue().call(i);
  return result;
}

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

public Data call(Tuple2<Long, Input> in) throws Exception {
    return new Data<TypeXPivot, TypeXRef>(in._1(),
        //(TypeXPivot) in._2().getField(getXPivot),
        //(TypeXRef) in._2().getField(getXRef));
        this.getXPivot.call(in._2()), this.getXRef.call(in._2()));
  }
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

@Override
public Tuple2<K, V> next() {
  if (!hasNext()) {
    throw new NoSuchElementException();
  }
  final LinkedList<V> values = new LinkedList();
  K currentKey = current._1();
  values.add(current._2());
  while (input.hasNext()) {
    Tuple2<K, V> next = input.next();
    if (comparator.compare(currentKey, next._1()) == 0) {
      values.add(next._2());
    } else {
      current = next;
      try {
        return new Tuple2<>(currentKey, converter.call(values));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }
  if (!input.hasNext()) {
    current = null;
  }
  try {
    return new Tuple2<>(currentKey, converter.call(values));
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

@SuppressWarnings("unchecked")
  public Iterator<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> call(Integer in,
                                      Iterator<Tuple2<Long, Input>> arg0) throws Exception {

    ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> outList = new ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>>(1);

    ArrayList<Data<TypeXPivot, TypeXRef>> list1 = new ArrayList<Data<TypeXPivot, TypeXRef>>(300000);

    while (arg0.hasNext()) {

      Tuple2<Long, Input> t2 = arg0.next();
      Input t = t2._2().copy();

      list1.add(new Data(t2._1(), getXPivot.call(t), getXRef.call(t)));//(TypeXPivot) t.getField(getXPivot), (TypeXRef) t.getField(getXRef)));
    }
    Collections.sort(list1, new Data.Comparator(list1ASC, list1ASCSec));
    Data[] myData = new Data[list1.size()];
    list1.toArray(myData);
    List2AttributesObjectSkinny<TypeXPivot, TypeXRef> lo = new List2AttributesObjectSkinny<TypeXPivot, TypeXRef>(myData,
        in);
    if (!lo.isEmpty()) {
      outList.add(lo);
    }
    return outList.iterator();
  }
}

代码示例来源:origin: uber/marmaray

@Override
  public List<ConverterResult<byte[], AvroPayload>> convert(@NonNull final byte[] data) throws Exception {
    GenericRecord genericRecord = this.schemaServiceReader.read(data);
    for (Function<GenericRecord, GenericRecord> func : this.updateFunctions) {
      genericRecord = func.call(genericRecord);
    }
    return Collections.singletonList(new ConverterResult(new AvroPayload(genericRecord, this.fieldsToCache)));
  }
}

代码示例来源:origin: cloudera-labs/envelope

while (iter.hasNext()) {
 Row r = iter.next();
 mutator.mutate(_rowToMutation.call(r));

相关文章

微信公众号

最新文章

更多

Function类方法