hive regexserde多行日志匹配在一行之后显示空值

aelbi1ox  于 2021-06-26  发布在  Hive
关注(0)|答案(0)|浏览(211)

尝试为单行加载多行apachetomcat日志,但它只加载单行,并在到达下一条记录之前显示其余行的空值。
我在之前的文章中尝试过regex,但它们不起作用,相同的正则表达式在测试工具中起作用这里是其中一个的链接:http://rubular.com/r/nvrwhuwg1c
它正确地识别行并将它们按正确的组分开,但是当我尝试在Hive中使用相同的代码时,它不起作用。
样品记录:

12-Dec-2013 12:03:26.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/jsp-api-2.3.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/jsp-api-2.3.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.catalina.startup.ContextConfig.processJarsForWebFragments(ContextConfig.java:1898)
    at org.apache.catalina.startup.ContextConfig.webConfig(ContextConfig.java:1126)
    at org.apache.catalina.startup.ContextConfig.configureStart(ContextConfig.java:775)
    at org.apache.catalina.startup.ContextConfig.lifecycleEvent(ContextConfig.java:299)
    at org.apache.catalina.util.LifecycleBase.fireLifecycleEvent(LifecycleBase.java:94)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5105)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

13-Dec-2016 12:03:24.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/el-api-3.0.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/el-api-3.0.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.jasper.servlet.TldScanner.scanJars(TldScanner.java:262)
    at org.apache.jasper.servlet.TldScanner.scan(TldScanner.java:104)
    at org.apache.jasper.servlet.JasperInitializer.onStartup(JasperInitializer.java:101)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5196)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Create Table Query which i am trying 

CREATE EXTERNAL TABLE apache_cat_log_test 
(
    logdatetime STRING, 
    logtype STRING, 
    requestid STRING, 
    verbosedata STRING,
    msg string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
  "input.regex" = "^(\\d{2}-\\w{3}-\\d{4})\\s+(.*?)\\s+(\\w+)\\s*(.*?)\\s*$((?:\\s*(?!\\d{2}).*?$)*)"
 )
STORED AS TEXTFILE
location '/catlogs';

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题