cascading.pipe.GroupBy.<init>()方法的使用及代码示例

x33g5p2x  于2022-01-20 转载在 其他  
字(14.7k)|赞(0)|评价(0)|浏览(86)

本文整理了Java中cascading.pipe.GroupBy.<init>()方法的一些代码示例,展示了GroupBy.<init>()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。GroupBy.<init>()方法的具体详情如下:
包路径:cascading.pipe.GroupBy
类名称:GroupBy
方法名:<init>

GroupBy.<init>介绍

[英]Creates a new GroupBy instance that will group on Fields#ALL fields.
[中]创建一个新的GroupBy实例,该实例将按字段#所有字段分组。

代码示例

代码示例来源:origin: LiveRamp/cascading_ext

public CreateBloomFilter(Pipe keys, String bloomFilterID, String approxCountPartsDir, String bloomPartsDir, String keyBytesField, HashFunctionFactory hashFactory) throws IOException {
 super(keys);
 Pipe smallPipe = new Each(keys, new Fields(keyBytesField), new GetIndices(hashFactory), new Fields("split", "index", "hash_num"));
 smallPipe = new Each(smallPipe, new Fields("split", "index", "hash_num"), new Unique.FilterPartialDuplicates());
 smallPipe = new GroupBy(smallPipe, new Fields("split"));
 smallPipe = new Every(smallPipe, new Fields("index", "hash_num"), new CreateBloomFilterFromIndices(), Fields.ALL);
 ConfigDef bloomDef = smallPipe.getStepConfigDef();
 bloomDef.setProperty(BloomProps.BLOOM_FILTER_PARTS_DIR, bloomPartsDir);
 bloomDef.setProperty(BloomProps.BLOOM_KEYS_COUNTS_DIR, approxCountPartsDir);
 bloomDef.setProperty(BloomProps.TARGET_BLOOM_FILTER_ID, bloomFilterID);
 setTails(smallPipe);
}

代码示例来源:origin: cwensel/cascading

@Test
public void testBuildMerge()
 {
 Tap sourceLower = getPlatform().getTextFile( "file1" );
 Tap sourceUpper = getPlatform().getTextFile( "file2" );
 Map sources = new HashMap();
 sources.put( "lower", sourceLower );
 sources.put( "upper", sourceUpper );
 Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );
 Tap sink = getPlatform().getTextFile( "outpath", SinkMode.REPLACE );
 Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
 Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
 Pipe splice = new GroupBy( "merge", Pipe.pipes( pipeLower, pipeUpper ), new Fields( "num" ), null, false );
 Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testPipeGroupBy()
 {
 Pipe pipe = new Pipe( "foo" );
 pipe = new Each( pipe, new Fields( "a" ), new Identity() );
 pipe = new GroupBy( pipe, new Fields( "b" ) );
 assertEqualsTrace( "cascading.TraceTest.testPipeGroupBy(TraceTest.java", pipe.getTrace() );
 }

代码示例来源:origin: cascading/cascading-platform

public FirstAssembly( Pipe previous )
 {
 Pipe pipe = new Pipe( "first", previous );
 pipe = new Each( pipe, new Identity() );
 pipe = new GroupBy( pipe, Fields.ALL );
 pipe = new Every( pipe, new First(), Fields.RESULTS );
 setTails( pipe );
 }
}

代码示例来源:origin: cascading/cascading-platform

@Test
public void testBuildMerge()
 {
 Tap sourceLower = getPlatform().getTextFile( "file1" );
 Tap sourceUpper = getPlatform().getTextFile( "file2" );
 Map sources = new HashMap();
 sources.put( "lower", sourceLower );
 sources.put( "upper", sourceUpper );
 Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );
 Tap sink = getPlatform().getTextFile( "outpath", SinkMode.REPLACE );
 Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
 Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
 Pipe splice = new GroupBy( "merge", Pipe.pipes( pipeLower, pipeUpper ), new Fields( "num" ), null, false );
 Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );
 }

代码示例来源:origin: cwensel/cascading

@Override
public List<Pipe> resolveTails( Context context )
 {
 Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) );
 pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
 return Arrays.asList( pipe );
 }
};

代码示例来源:origin: cwensel/cascading

public FirstAssembly( Pipe previous )
 {
 Pipe pipe = new Pipe( "first", previous );
 pipe = new Each( pipe, new Identity() );
 pipe = new GroupBy( pipe, Fields.ALL );
 pipe = new Every( pipe, new First(), Fields.RESULTS );
 setTails( pipe );
 }
}

代码示例来源:origin: cwensel/cascading

@Test
public void testSwap() throws Exception
 {
 Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
 Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ipaddress" ), getOutputPath( "swap" ), SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
 pipe = new Each( pipe, new Fields( "line" ), parser, Fields.SWAP );
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
 pipe = new Each( pipe, new Fields( "ip" ), new Identity( new Fields( "ipaddress" ) ), Fields.SWAP );
 Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
 flow.complete();
 validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
 }

代码示例来源:origin: cascading/cascading-platform

@Override
public List<Pipe> resolveTails( Context context )
 {
 Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) );
 pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
 return Arrays.asList( pipe );
 }
};

代码示例来源:origin: cascading/cascading-platform

@Test
public void testSwap() throws Exception
 {
 Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
 Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ipaddress" ), getOutputPath( "swap" ), SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
 pipe = new Each( pipe, new Fields( "line" ), parser, Fields.SWAP );
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
 pipe = new Each( pipe, new Fields( "ip" ), new Identity( new Fields( "ipaddress" ) ), Fields.SWAP );
 Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
 flow.complete();
 validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testGroupBySortResolver() throws Exception
 {
 Fields sourceFields = new Fields( "first", "second" );
 Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
 Fields sinkFields = new Fields( "third", "fourth" );
 Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 pipe = new GroupBy( pipe, new Fields( "first" ), new Fields( "third" ) );
 verify( source, sink, pipe );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testNone() throws Exception
 {
 Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
 Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ip" ), getOutputPath( "none" ), SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
 pipe = new Each( pipe, new Fields( "line" ), parser, Fields.ALL );
 pipe = new Each( pipe, new Fields( "line" ), new NoOp(), Fields.SWAP ); // declares Fields.NONE
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
 pipe = new Each( pipe, Fields.NONE, new Insert( new Fields( "ipaddress" ), "1.2.3.4" ), Fields.ALL );
 Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
 flow.complete();
 validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
 }

代码示例来源:origin: cascading/cascading-platform

@Test
public void testGroupBySortResolver() throws Exception
 {
 Fields sourceFields = new Fields( "first", "second" );
 Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
 Fields sinkFields = new Fields( "third", "fourth" );
 Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 pipe = new GroupBy( pipe, new Fields( "first" ), new Fields( "third" ) );
 verify( source, sink, pipe );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testTrapNamesPass() throws Exception
 {
 Tap source = getPlatform().getTextFile( "foosource" );
 Pipe pipe = new Pipe( "map" );
 pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
 // always fail
 pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
 pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
 Tap sink = getPlatform().getTextFile( "foosink" );
 Tap trap = getPlatform().getTextFile( "footrap" );
 Map<String, Tap> sources = new HashMap<String, Tap>();
 Map<String, Tap> sinks = new HashMap<String, Tap>();
 Map<String, Tap> traps = new HashMap<String, Tap>();
 sources.put( "map", source );
 sinks.put( "reduce", sink );
 traps.put( "map", trap );
 getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testGroupByResolver() throws Exception
 {
 Fields sourceFields = new Fields( "first", "second" );
 Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
 Fields sinkFields = new Fields( "third", "fourth" );
 Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 pipe = new GroupBy( pipe, new Fields( "third" ) );
 verify( source, sink, pipe );
 }

代码示例来源:origin: cascading/cascading-platform

@Test
public void testNone() throws Exception
 {
 Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
 Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ip" ), getOutputPath( "none" ), SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
 pipe = new Each( pipe, new Fields( "line" ), parser, Fields.ALL );
 pipe = new Each( pipe, new Fields( "line" ), new NoOp(), Fields.SWAP ); // declares Fields.NONE
 pipe = new GroupBy( pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
 pipe = new Each( pipe, Fields.NONE, new Insert( new Fields( "ipaddress" ), "1.2.3.4" ), Fields.ALL );
 Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
 flow.complete();
 validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
 }

代码示例来源:origin: cascading/cascading-platform

@Test
public void testGroupByResolver() throws Exception
 {
 Fields sourceFields = new Fields( "first", "second" );
 Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
 Fields sinkFields = new Fields( "third", "fourth" );
 Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 pipe = new GroupBy( pipe, new Fields( "third" ) );
 verify( source, sink, pipe );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testTrapNamesPass2() throws Exception
 {
 Tap source = getPlatform().getTextFile( "foosource" );
 Pipe pipe = new Pipe( "map" );
 pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
 pipe = new Pipe( "middle", pipe );
 pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
 pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
 Tap sink = getPlatform().getTextFile( "foosink" );
 Tap trap = getPlatform().getTextFile( "footrap" );
 Map<String, Tap> sources = new HashMap<String, Tap>();
 Map<String, Tap> sinks = new HashMap<String, Tap>();
 Map<String, Tap> traps = new HashMap<String, Tap>();
 sources.put( "map", source );
 sinks.put( "reduce", sink );
 traps.put( "middle", trap );
 getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
 }

代码示例来源:origin: cwensel/cascading

@Test
public void testEveryOutResolver() throws Exception
 {
 Fields sourceFields = new Fields( "first", "second" );
 Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
 Fields sinkFields = new Fields( "third", "fourth" );
 Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
 Pipe pipe = new Pipe( "test" );
 pipe = new GroupBy( pipe, new Fields( "first" ) );
 pipe = new Every( pipe, new Fields( "second" ), new Count(), new Fields( "third" ) );
 verify( source, sink, pipe );
 }

代码示例来源:origin: cascading/cascading-platform

@Test
public void testTrapNamesPass2() throws Exception
 {
 Tap source = getPlatform().getTextFile( "foosource" );
 Pipe pipe = new Pipe( "map" );
 pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
 pipe = new Pipe( "middle", pipe );
 pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
 pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
 pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
 Tap sink = getPlatform().getTextFile( "foosink" );
 Tap trap = getPlatform().getTextFile( "footrap" );
 Map<String, Tap> sources = new HashMap<String, Tap>();
 Map<String, Tap> sinks = new HashMap<String, Tap>();
 Map<String, Tap> traps = new HashMap<String, Tap>();
 sources.put( "map", source );
 sinks.put( "reduce", sink );
 traps.put( "middle", trap );
 getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
 }

相关文章

微信公众号

最新文章

更多

GroupBy类方法