本文整理了Java中cascading.pipe.GroupBy.<init>()
方法的一些代码示例,展示了GroupBy.<init>()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。GroupBy.<init>()
方法的具体详情如下:
包路径:cascading.pipe.GroupBy
类名称:GroupBy
方法名:<init>
[英]Creates a new GroupBy instance that will group on Fields#ALL fields.
[中]创建一个新的GroupBy实例,该实例将按字段#所有字段分组。
代码示例来源:origin: LiveRamp/cascading_ext
public CreateBloomFilter(Pipe keys, String bloomFilterID, String approxCountPartsDir, String bloomPartsDir, String keyBytesField, HashFunctionFactory hashFactory) throws IOException {
super(keys);
Pipe smallPipe = new Each(keys, new Fields(keyBytesField), new GetIndices(hashFactory), new Fields("split", "index", "hash_num"));
smallPipe = new Each(smallPipe, new Fields("split", "index", "hash_num"), new Unique.FilterPartialDuplicates());
smallPipe = new GroupBy(smallPipe, new Fields("split"));
smallPipe = new Every(smallPipe, new Fields("index", "hash_num"), new CreateBloomFilterFromIndices(), Fields.ALL);
ConfigDef bloomDef = smallPipe.getStepConfigDef();
bloomDef.setProperty(BloomProps.BLOOM_FILTER_PARTS_DIR, bloomPartsDir);
bloomDef.setProperty(BloomProps.BLOOM_KEYS_COUNTS_DIR, approxCountPartsDir);
bloomDef.setProperty(BloomProps.TARGET_BLOOM_FILTER_ID, bloomFilterID);
setTails(smallPipe);
}
代码示例来源:origin: cwensel/cascading
@Test
public void testBuildMerge()
{
Tap sourceLower = getPlatform().getTextFile( "file1" );
Tap sourceUpper = getPlatform().getTextFile( "file2" );
Map sources = new HashMap();
sources.put( "lower", sourceLower );
sources.put( "upper", sourceUpper );
Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );
Tap sink = getPlatform().getTextFile( "outpath", SinkMode.REPLACE );
Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
Pipe splice = new GroupBy( "merge", Pipe.pipes( pipeLower, pipeUpper ), new Fields( "num" ), null, false );
Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testPipeGroupBy()
{
Pipe pipe = new Pipe( "foo" );
pipe = new Each( pipe, new Fields( "a" ), new Identity() );
pipe = new GroupBy( pipe, new Fields( "b" ) );
assertEqualsTrace( "cascading.TraceTest.testPipeGroupBy(TraceTest.java", pipe.getTrace() );
}
代码示例来源:origin: cascading/cascading-platform
public FirstAssembly( Pipe previous )
{
Pipe pipe = new Pipe( "first", previous );
pipe = new Each( pipe, new Identity() );
pipe = new GroupBy( pipe, Fields.ALL );
pipe = new Every( pipe, new First(), Fields.RESULTS );
setTails( pipe );
}
}
代码示例来源:origin: cascading/cascading-platform
@Test
public void testBuildMerge()
{
Tap sourceLower = getPlatform().getTextFile( "file1" );
Tap sourceUpper = getPlatform().getTextFile( "file2" );
Map sources = new HashMap();
sources.put( "lower", sourceLower );
sources.put( "upper", sourceUpper );
Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );
Tap sink = getPlatform().getTextFile( "outpath", SinkMode.REPLACE );
Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
Pipe splice = new GroupBy( "merge", Pipe.pipes( pipeLower, pipeUpper ), new Fields( "num" ), null, false );
Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );
}
代码示例来源:origin: cwensel/cascading
@Override
public List<Pipe> resolveTails( Context context )
{
Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) );
pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
return Arrays.asList( pipe );
}
};
代码示例来源:origin: cwensel/cascading
public FirstAssembly( Pipe previous )
{
Pipe pipe = new Pipe( "first", previous );
pipe = new Each( pipe, new Identity() );
pipe = new GroupBy( pipe, Fields.ALL );
pipe = new Every( pipe, new First(), Fields.RESULTS );
setTails( pipe );
}
}
代码示例来源:origin: cwensel/cascading
@Test
public void testSwap() throws Exception
{
Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ipaddress" ), getOutputPath( "swap" ), SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
pipe = new Each( pipe, new Fields( "line" ), parser, Fields.SWAP );
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
pipe = new Each( pipe, new Fields( "ip" ), new Identity( new Fields( "ipaddress" ) ), Fields.SWAP );
Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
flow.complete();
validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
}
代码示例来源:origin: cascading/cascading-platform
@Override
public List<Pipe> resolveTails( Context context )
{
Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) );
pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
return Arrays.asList( pipe );
}
};
代码示例来源:origin: cascading/cascading-platform
@Test
public void testSwap() throws Exception
{
Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ipaddress" ), getOutputPath( "swap" ), SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
pipe = new Each( pipe, new Fields( "line" ), parser, Fields.SWAP );
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
pipe = new Each( pipe, new Fields( "ip" ), new Identity( new Fields( "ipaddress" ) ), Fields.SWAP );
Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
flow.complete();
validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testGroupBySortResolver() throws Exception
{
Fields sourceFields = new Fields( "first", "second" );
Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
Fields sinkFields = new Fields( "third", "fourth" );
Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
pipe = new GroupBy( pipe, new Fields( "first" ), new Fields( "third" ) );
verify( source, sink, pipe );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testNone() throws Exception
{
Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ip" ), getOutputPath( "none" ), SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
pipe = new Each( pipe, new Fields( "line" ), parser, Fields.ALL );
pipe = new Each( pipe, new Fields( "line" ), new NoOp(), Fields.SWAP ); // declares Fields.NONE
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
pipe = new Each( pipe, Fields.NONE, new Insert( new Fields( "ipaddress" ), "1.2.3.4" ), Fields.ALL );
Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
flow.complete();
validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
}
代码示例来源:origin: cascading/cascading-platform
@Test
public void testGroupBySortResolver() throws Exception
{
Fields sourceFields = new Fields( "first", "second" );
Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
Fields sinkFields = new Fields( "third", "fourth" );
Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
pipe = new GroupBy( pipe, new Fields( "first" ), new Fields( "third" ) );
verify( source, sink, pipe );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testTrapNamesPass() throws Exception
{
Tap source = getPlatform().getTextFile( "foosource" );
Pipe pipe = new Pipe( "map" );
pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
// always fail
pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
Tap sink = getPlatform().getTextFile( "foosink" );
Tap trap = getPlatform().getTextFile( "footrap" );
Map<String, Tap> sources = new HashMap<String, Tap>();
Map<String, Tap> sinks = new HashMap<String, Tap>();
Map<String, Tap> traps = new HashMap<String, Tap>();
sources.put( "map", source );
sinks.put( "reduce", sink );
traps.put( "map", trap );
getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testGroupByResolver() throws Exception
{
Fields sourceFields = new Fields( "first", "second" );
Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
Fields sinkFields = new Fields( "third", "fourth" );
Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
pipe = new GroupBy( pipe, new Fields( "third" ) );
verify( source, sink, pipe );
}
代码示例来源:origin: cascading/cascading-platform
@Test
public void testNone() throws Exception
{
Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache );
Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ip" ), getOutputPath( "none" ), SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" );
pipe = new Each( pipe, new Fields( "line" ), parser, Fields.ALL );
pipe = new Each( pipe, new Fields( "line" ), new NoOp(), Fields.SWAP ); // declares Fields.NONE
pipe = new GroupBy( pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) );
pipe = new Each( pipe, Fields.NONE, new Insert( new Fields( "ipaddress" ), "1.2.3.4" ), Fields.ALL );
Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
flow.complete();
validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) );
}
代码示例来源:origin: cascading/cascading-platform
@Test
public void testGroupByResolver() throws Exception
{
Fields sourceFields = new Fields( "first", "second" );
Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
Fields sinkFields = new Fields( "third", "fourth" );
Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
pipe = new GroupBy( pipe, new Fields( "third" ) );
verify( source, sink, pipe );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testTrapNamesPass2() throws Exception
{
Tap source = getPlatform().getTextFile( "foosource" );
Pipe pipe = new Pipe( "map" );
pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
pipe = new Pipe( "middle", pipe );
pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
Tap sink = getPlatform().getTextFile( "foosink" );
Tap trap = getPlatform().getTextFile( "footrap" );
Map<String, Tap> sources = new HashMap<String, Tap>();
Map<String, Tap> sinks = new HashMap<String, Tap>();
Map<String, Tap> traps = new HashMap<String, Tap>();
sources.put( "map", source );
sinks.put( "reduce", sink );
traps.put( "middle", trap );
getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
}
代码示例来源:origin: cwensel/cascading
@Test
public void testEveryOutResolver() throws Exception
{
Fields sourceFields = new Fields( "first", "second" );
Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP );
Fields sinkFields = new Fields( "third", "fourth" );
Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
pipe = new GroupBy( pipe, new Fields( "first" ) );
pipe = new Every( pipe, new Fields( "second" ), new Count(), new Fields( "third" ) );
verify( source, sink, pipe );
}
代码示例来源:origin: cascading/cascading-platform
@Test
public void testTrapNamesPass2() throws Exception
{
Tap source = getPlatform().getTextFile( "foosource" );
Pipe pipe = new Pipe( "map" );
pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
pipe = new Pipe( "middle", pipe );
pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );
pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
Tap sink = getPlatform().getTextFile( "foosink" );
Tap trap = getPlatform().getTextFile( "footrap" );
Map<String, Tap> sources = new HashMap<String, Tap>();
Map<String, Tap> sinks = new HashMap<String, Tap>();
Map<String, Tap> traps = new HashMap<String, Tap>();
sources.put( "map", source );
sinks.put( "reduce", sink );
traps.put( "middle", trap );
getPlatform().getFlowConnector().connect( "trap test", sources, sinks, traps, pipe );
}
内容来源于网络,如有侵权,请联系作者删除!