org.apache.spark.api.java.function.Function.call()方法的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(9.6k)|赞(0)|评价(0)|浏览(153)

本文整理了Java中org.apache.spark.api.java.function.Function.call()方法的一些代码示例,展示了Function.call()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Function.call()方法的具体详情如下:
包路径:org.apache.spark.api.java.function.Function
类名称:Function
方法名:call

Function.call介绍

暂无

代码示例

代码示例来源:origin: apache/kylin

@Override
public Tuple2<K, V> next() {
  if (!hasNext()) {
    throw new NoSuchElementException();
  }
  final LinkedList<V> values = new LinkedList();
  K currentKey = current._1();
  values.add(current._2());
  while (input.hasNext()) {
    Tuple2<K, V> next = input.next();
    if (comparator.compare(currentKey, next._1()) == 0) {
      values.add(next._2());
    } else {
      current = next;
      try {
        return new Tuple2<>(currentKey, converter.call(values));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }
  if (!input.hasNext()) {
    current = null;
  }
  try {
    return new Tuple2<>(currentKey, converter.call(values));
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

代码示例来源:origin: OryxProject/oryx

/**
 * Implementation which splits based solely on time. It will return approximately
 * the earliest {@link #getTestFraction()} of input, ordered by timestamp, as new training
 * data and the rest as test data.
 */
@Override
protected Pair<JavaRDD<String>,JavaRDD<String>> splitNewDataToTrainTest(JavaRDD<String> newData) {
 // Rough approximation; assumes timestamps are fairly evenly distributed
 StatCounter maxMin = newData.mapToDouble(line -> MLFunctions.TO_TIMESTAMP_FN.call(line).doubleValue()).stats();
 long minTime = (long) maxMin.min();
 long maxTime = (long) maxMin.max();
 log.info("New data timestamp range: {} - {}", minTime, maxTime);
 long approxTestTrainBoundary = (long) (maxTime - getTestFraction() * (maxTime - minTime));
 log.info("Splitting at timestamp {}", approxTestTrainBoundary);
 JavaRDD<String> newTrainData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) < approxTestTrainBoundary);
 JavaRDD<String> testData = newData.filter(
   line -> MLFunctions.TO_TIMESTAMP_FN.call(line) >= approxTestTrainBoundary);
 return new Pair<>(newTrainData, testData);
}

代码示例来源:origin: OryxProject/oryx

@Test(expected = ArrayIndexOutOfBoundsException.class)
public void testParseBadTimestamp() throws Exception {
 MLFunctions.TO_TIMESTAMP_FN.call("[1,2,3]");
}

代码示例来源:origin: OryxProject/oryx

@Test(expected = IOException.class)
public void testParseBadLine() throws Exception {
 MLFunctions.PARSE_FN.call("[1,]");
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testSumWithNaN() throws Exception {
 OryxTest.assertEquals(1.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0)).doubleValue());
 OryxTest.assertEquals(6.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, 3.0)).doubleValue());
 OryxTest.assertEquals(3.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, Double.NaN, 3.0)).doubleValue());
 assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, Double.NaN)));
 assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(Double.NaN)));
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testToTimestamp() throws Exception {
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123,").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123]").longValue());
 assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123,\"d\"]").longValue());
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testParseJSON() throws Exception {
 assertArrayEquals(new String[]{"a", "1", "foo"},
          MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\"]"));
 assertArrayEquals(new String[]{"a", "1", "foo", ""},
          MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\",\"\"]"));
 assertArrayEquals(new String[]{"2.3"},
          MLFunctions.PARSE_FN.call("[\"2.3\"]"));
 assertArrayEquals(new String[]{},
          MLFunctions.PARSE_FN.call("[]"));
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testParseCSV() throws Exception {
 assertArrayEquals(new String[]{"a", "1", "foo"},
          MLFunctions.PARSE_FN.call("a,1,foo"));
 assertArrayEquals(new String[]{"a", "1", "foo", ""},
          MLFunctions.PARSE_FN.call("a,1,foo,"));
 assertArrayEquals(new String[]{"2.3"},
          MLFunctions.PARSE_FN.call("2.3"));
 // Different from JSON, sort of:
 assertArrayEquals(new String[]{""},
          MLFunctions.PARSE_FN.call(""));
}

代码示例来源:origin: OryxProject/oryx

JavaPairRDD<Tuple2<String,String>,Double> tuples = sortedValues.mapToPair(line -> {
 try {
  String[] tokens = MLFunctions.PARSE_FN.call(line);
  String user = tokens[0];
  String item = tokens[1];

代码示例来源:origin: org.apache.crunch/crunch-spark

@Override
 public R call(T t) throws Exception {
  return f.call(t);
 }
};

代码示例来源:origin: apache/crunch

@Override
 public R call(T t) throws Exception {
  return f.call(t);
 }
};

代码示例来源:origin: org.qcri.rheem/rheem-spark

@Override
public OutputType call(InputType v1) throws Exception {
  if (this.isFirstRun) {
    ((org.qcri.rheem.core.function.ExtendedFunction) this.impl).open(this.executionContext);
    this.isFirstRun = false;
  }
  return this.impl.call(v1);
}

代码示例来源:origin: apache/incubator-nemo

@Override
public void onData(final I element) {
  try {
   outputCollector.emit(func.call(element));
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
}

代码示例来源:origin: org.apache.beam/beam-runners-spark

/**
 * A utility method that adapts {@link Function} to a {@link FlatMapFunction} with an {@link
 * Iterator} input. This is particularly useful because it allows to use functions written for map
 * functions in flatmap functions.
 *
 * @param func the {@link Function} to adapt.
 * @param <InputT> the input type.
 * @param <OutputT> the output type.
 * @return a {@link FlatMapFunction} that accepts an {@link Iterator} as an input and applies the
 *     {@link Function} on every element.
 */
public static <InputT, OutputT>
  FlatMapFunction<Iterator<InputT>, OutputT> functionToFlatMapFunction(
    final Function<InputT, OutputT> func) {
 return itr -> {
  final Iterator<OutputT> outputItr =
    Iterators.transform(
      itr,
      t -> {
       try {
        return func.call(t);
       } catch (Exception e) {
        throw new RuntimeException(e);
       }
      });
  return outputItr;
 };
}

代码示例来源:origin: net.sansa-stack/sansa-rdf-spark

public static <I, O> org.apache.spark.api.java.function.Function<I, O> wrap(org.apache.spark.api.java.function.Function<I, O> fn) {
  JavaKryoSerializationWrapper<org.apache.spark.api.java.function.Function<I, O>> wrapper = new JavaKryoSerializationWrapper<>(fn);
  org.apache.spark.api.java.function.Function<I, O> result = i -> wrapper.getValue().call(i);
  return result;
}

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

public Data call(Tuple2<Long, Input> in) throws Exception {
    return new Data<TypeXPivot, TypeXRef>(in._1(),
        //(TypeXPivot) in._2().getField(getXPivot),
        //(TypeXRef) in._2().getField(getXRef));
        this.getXPivot.call(in._2()), this.getXRef.call(in._2()));
  }
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

@Override
public Tuple2<K, V> next() {
  if (!hasNext()) {
    throw new NoSuchElementException();
  }
  final LinkedList<V> values = new LinkedList();
  K currentKey = current._1();
  values.add(current._2());
  while (input.hasNext()) {
    Tuple2<K, V> next = input.next();
    if (comparator.compare(currentKey, next._1()) == 0) {
      values.add(next._2());
    } else {
      current = next;
      try {
        return new Tuple2<>(currentKey, converter.call(values));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }
  if (!input.hasNext()) {
    current = null;
  }
  try {
    return new Tuple2<>(currentKey, converter.call(values));
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

@SuppressWarnings("unchecked")
  public Iterator<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> call(Integer in,
                                      Iterator<Tuple2<Long, Input>> arg0) throws Exception {

    ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> outList = new ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>>(1);

    ArrayList<Data<TypeXPivot, TypeXRef>> list1 = new ArrayList<Data<TypeXPivot, TypeXRef>>(300000);

    while (arg0.hasNext()) {

      Tuple2<Long, Input> t2 = arg0.next();
      Input t = t2._2().copy();

      list1.add(new Data(t2._1(), getXPivot.call(t), getXRef.call(t)));//(TypeXPivot) t.getField(getXPivot), (TypeXRef) t.getField(getXRef)));
    }
    Collections.sort(list1, new Data.Comparator(list1ASC, list1ASCSec));
    Data[] myData = new Data[list1.size()];
    list1.toArray(myData);
    List2AttributesObjectSkinny<TypeXPivot, TypeXRef> lo = new List2AttributesObjectSkinny<TypeXPivot, TypeXRef>(myData,
        in);
    if (!lo.isEmpty()) {
      outList.add(lo);
    }
    return outList.iterator();
  }
}

代码示例来源:origin: uber/marmaray

@Override
  public List<ConverterResult<byte[], AvroPayload>> convert(@NonNull final byte[] data) throws Exception {
    GenericRecord genericRecord = this.schemaServiceReader.read(data);
    for (Function<GenericRecord, GenericRecord> func : this.updateFunctions) {
      genericRecord = func.call(genericRecord);
    }
    return Collections.singletonList(new ConverterResult(new AvroPayload(genericRecord, this.fieldsToCache)));
  }
}

代码示例来源:origin: uber/marmaray

public static <T extends HoodieRecordPayload> JavaRDD<HoodieRecord<T>> combineRecords(
      final JavaRDD<HoodieRecord<T>> records, final Function<HoodieRecord<T>, Object> recordKeyFunc,
      final int parallelism) {
    return records
        .mapToPair(record -> new Tuple2<>(recordKeyFunc.call(record), record))
        .reduceByKey((rec1, rec2) -> {
            @SuppressWarnings("unchecked")
            T reducedData = (T) rec1.getData().preCombine(rec2.getData());
            return new HoodieRecord<T>(rec1.getKey(), reducedData);
          }, parallelism)
        .map(recordTuple -> recordTuple._2());
  }
}

相关文章

微信公众号

最新文章

更多

Function类方法