merge语句从teradata到mysql的迁移

50few1ms  于 2021-08-09  发布在  Java
关注(0)|答案(2)|浏览(314)

tables:- schema.infa_task_run_stg schema.infa\u任务\u运行
schema.infa\u task\u run\u stg的主索引:-主题区域schema.infa\u task\u run的主索引:-主题id、工作流id、工作流运行id、工作集运行id、示例id、任务id、开始时间
合并语句teradata:-

MERGE INTO schema.INFA_TASK_RUN USING schema.INFA_TASK_RUN_STG src
ON
        INFA_TASK_RUN_RAW.SUBJECT_ID = src.SUBJECT_ID
AND     INFA_TASK_RUN_RAW.WORKFLOW_ID = src.WORKFLOW_ID
AND     INFA_TASK_RUN_RAW.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND     INFA_TASK_RUN_RAW.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND     INFA_TASK_RUN_RAW.INSTANCE_ID = src.INSTANCE_ID
AND     INFA_TASK_RUN_RAW.TASK_ID = src.TASK_ID
AND     INFA_TASK_RUN_RAW.START_TIME = src.START_TIME
WHEN MATCHED THEN UPDATE SET
        END_TIME = src.END_TIME
,       RUN_ERR_CODE = src.RUN_ERR_CODE
,       RUN_ERR_MSG = src.RUN_ERR_MSG
,       RUN_STATUS_CODE = src.RUN_STATUS_CODE
WHEN NOT MATCHED THEN INSERT(
                SUBJECT_AREA
        ,       WORKFLOW_NAME
        ,       VERSION_NUMBER
        ,       SUBJECT_ID
        ,       WORKFLOW_ID
        ,       WORKFLOW_RUN_ID
        ,       WORKLET_RUN_ID
        ,       CHILD_RUN_ID
        ,       INSTANCE_ID
        ,       INSTANCE_NAME
        ,       TASK_ID
        ,       TASK_TYPE_NAME
        ,       TASK_TYPE
        ,       START_TIME
        ,       END_TIME
        ,       RUN_ERR_CODE
        ,       RUN_ERR_MSG
        ,       RUN_STATUS_CODE
        ,       TASK_NAME
        ,       TASK_VERSION_NUMBER
        ,       SERVER_ID
        ,       SERVER_NAME
        )VALUES(
                src.SUBJECT_AREA
        ,       src.WORKFLOW_NAME
        ,       src.VERSION_NUMBER
        ,       src.SUBJECT_ID
        ,       src.WORKFLOW_ID
        ,       src.WORKFLOW_RUN_ID
        ,       src.WORKLET_RUN_ID
        ,       src.CHILD_RUN_ID
        ,       src.INSTANCE_ID
        ,       src.INSTANCE_NAME
        ,       src.TASK_ID
        ,       src.TASK_TYPE_NAME
        ,       src.TASK_TYPE
        ,       src.START_TIME
        ,       src.END_TIME
        ,       src.RUN_ERR_CODE
        ,       src.RUN_ERR_MSG
        ,       src.RUN_STATUS_CODE
        ,       src.TASK_NAME
        ,       src.TASK_VERSION_NUMBER
        ,       src.SERVER_ID
        ,       src.SERVER_NAME
        );

我知道mysql数据库不支持merge语句。我正在尝试更新和插入语句。但这似乎不对。

UPDATE schema.INFA_TASK_RUN tgt INNER JOIN schema.INFA_TASK_RUN_STG src
ON
       tgt.SUBJECT_ID = src.SUBJECT_ID
AND     tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND     tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND     tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND     tgt.INSTANCE_ID = src.INSTANCE_ID
AND     tgt.TASK_ID = src.TASK_ID
AND     tgt.START_TIME = src.START_TIME
 SET
        tgt.END_TIME = src.END_TIME
,       tgt.RUN_ERR_CODE = src.RUN_ERR_CODE
,       tgt.RUN_ERR_MSG = src.RUN_ERR_MSG
,       tgt.RUN_STATUS_CODE = src.RUN_STATUS_CODE;

insert into schema.INFA_TASK_RUN (SUBJECT_AREA         ,       WORKFLOW_NAME         ,       VERSION_NUMBER         ,       SUBJECT_ID         ,       WORKFLOW_ID         ,       WORKFLOW_RUN_ID         ,       WORKLET_RUN_ID         ,       CHILD_RUN_ID         ,       INSTANCE_ID         ,       INSTANCE_NAME         ,       TASK_ID         ,       TASK_TYPE_NAME         ,       TASK_TYPE         ,       START_TIME         ,       END_TIME         ,       RUN_ERR_CODE         ,       RUN_ERR_MSG         ,       RUN_STATUS_CODE         ,       TASK_NAME         ,       TASK_VERSION_NUMBER         ,       SERVER_ID         ,       SERVER_NAME)
    select src.SUBJECT_AREA         ,       src.WORKFLOW_NAME         ,       src.VERSION_NUMBER         ,       src.SUBJECT_ID         ,       src.WORKFLOW_ID         ,       src.WORKFLOW_RUN_ID         ,       src.WORKLET_RUN_ID         ,       src.CHILD_RUN_ID         ,       src.INSTANCE_ID         ,       src.INSTANCE_NAME         ,       src.TASK_ID         ,       src.TASK_TYPE_NAME         ,       src.TASK_TYPE         ,       src.START_TIME         ,       src.END_TIME         ,       src.RUN_ERR_CODE         ,       src.RUN_ERR_MSG         ,       src.RUN_STATUS_CODE         ,       src.TASK_NAME         ,       src.TASK_VERSION_NUMBER         ,       src.SERVER_ID         ,       src.SERVER_NAME
    from schema.INFA_TASK_RUN_STG as src
        left outer join schema.INFA_TASK_RUN as tgt  ON
       tgt.SUBJECT_ID != src.SUBJECT_ID
AND     tgt.WORKFLOW_ID != src.WORKFLOW_ID
AND     tgt.WORKFLOW_RUN_ID != src.WORKFLOW_RUN_ID
AND     tgt.WORKLET_RUN_ID != src.WORKLET_RUN_ID
AND     tgt.INSTANCE_ID != src.INSTANCE_ID
AND     tgt.TASK_ID != src.TASK_ID
AND     tgt.START_TIME != src.START_TIME
k3bvogb1

k3bvogb11#

相信你正在寻找的是这样的东西(没有经过测试,只有在主键设置正确时才能工作):

INSERT INTO schema.INFA_TASK_RUN (
     SUBJECT_AREA
    ,WORKFLOW_NAME
    ,VERSION_NUMBER
    ,SUBJECT_ID
    ,WORKFLOW_ID
    ,WORKFLOW_RUN_ID
    ,WORKLET_RUN_ID
    ,CHILD_RUN_ID
    ,INSTANCE_ID
    ,INSTANCE_NAME
    ,TASK_ID
    ,TASK_TYPE_NAME
    ,TASK_TYPE
    ,START_TIME
    ,END_TIME
    ,RUN_ERR_CODE
    ,RUN_ERR_MSG
    ,RUN_STATUS_CODE
    ,TASK_NAME
    ,TASK_VERSION_NUMBER
    ,SERVER_ID
    ,SERVER_NAME
    )
SELECT
     SUBJECT_AREA
    ,WORKFLOW_NAME
    ,VERSION_NUMBER
    ,SUBJECT_ID
    ,WORKFLOW_ID
    ,WORKFLOW_RUN_ID
    ,WORKLET_RUN_ID
    ,CHILD_RUN_ID
    ,INSTANCE_ID
    ,INSTANCE_NAME
    ,TASK_ID
    ,TASK_TYPE_NAME
    ,TASK_TYPE
    ,START_TIME
    ,END_TIME
    ,RUN_ERR_CODE
    ,RUN_ERR_MSG
    ,RUN_STATUS_CODE
    ,TASK_NAME
    ,TASK_VERSION_NUMBER
    ,SERVER_ID
    ,SERVER_NAME
FROM schema.INFA_TASK_RUN_STG src
ON DUPLICATE KEY UPDATE
     END_TIME = src.END_TIME
    ,RUN_ERR_CODE = src.RUN_ERR_CODE
    ,RUN_ERR_MSG = src.RUN_ERR_MSG
    ,RUN_STATUS_CODE = src.RUN_STATUS_CODE;

在2020-05-21上编辑以显示单独的更新和基于注解的插入语句:
插入。。。在重复键语句上可能会更快。
从评论中我测试了原始语句,从问题中做了插入和更新。
注意,update语句工作正常。唯一的问题是,即使没有任何更改,每一行都会被更新。
可以向联接添加条件,如tgt.end\u time!=src.end\u时间,以确保只更新更改的记录。
问题中的原始更新查询:

UPDATE schema.INFA_TASK_RUN tgt INNER JOIN schema.INFA_TASK_RUN_STG src
ON
       tgt.SUBJECT_ID = src.SUBJECT_ID
AND     tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND     tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND     tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND     tgt.INSTANCE_ID = src.INSTANCE_ID
AND     tgt.TASK_ID = src.TASK_ID
AND     tgt.START_TIME = src.START_TIME
 SET
        tgt.END_TIME = src.END_TIME
,       tgt.RUN_ERR_CODE = src.RUN_ERR_CODE
,       tgt.RUN_ERR_MSG = src.RUN_ERR_MSG
,       tgt.RUN_STATUS_CODE = src.RUN_STATUS_CODE;

更新的插入:
必须更改insert语句,请查看join是在列相等的位置,我们只选择目标表中没有匹配值的位置,并检查目标表列是否为null:

INSERT INTO schema.INFA_TASK_RUN (
     SUBJECT_AREA
    ,WORKFLOW_NAME
    ,VERSION_NUMBER
    ,SUBJECT_ID
    ,WORKFLOW_ID
    ,WORKFLOW_RUN_ID
    ,WORKLET_RUN_ID
    ,CHILD_RUN_ID
    ,INSTANCE_ID
    ,INSTANCE_NAME
    ,TASK_ID
    ,TASK_TYPE_NAME
    ,TASK_TYPE
    ,START_TIME
    ,END_TIME
    ,RUN_ERR_CODE
    ,RUN_ERR_MSG
    ,RUN_STATUS_CODE
    ,TASK_NAME
    ,TASK_VERSION_NUMBER
    ,SERVER_ID
    ,SERVER_NAME
    )
    select src.SUBJECT_AREA
    ,src.WORKFLOW_NAME
    ,src.VERSION_NUMBER
    ,src.SUBJECT_ID
    ,src.WORKFLOW_ID
    ,src.WORKFLOW_RUN_ID
    ,src.WORKLET_RUN_ID
    ,src.CHILD_RUN_ID
    ,src.INSTANCE_ID
    ,src.INSTANCE_NAME
    ,src.TASK_ID
    ,src.TASK_TYPE_NAME
    ,src.TASK_TYPE
    ,src.START_TIME
    ,src.END_TIME
    ,src.RUN_ERR_CODE
    ,src.RUN_ERR_MSG
    ,src.RUN_STATUS_CODE
    ,src.TASK_NAME
    ,src.TASK_VERSION_NUMBER
    ,src.SERVER_ID
    ,src.SERVER_NAME
    FROM schema.INFA_TASK_RUN as tgt
        RIGHT JOIN schema.INFA_TASK_RUN_STG as src  ON
       tgt.SUBJECT_ID = src.SUBJECT_ID
AND     tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND     tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND     tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND     tgt.INSTANCE_ID = src.INSTANCE_ID
AND     tgt.TASK_ID = src.TASK_ID
AND     tgt.START_TIME = src.START_TIME
WHERE tgt.SUBJECT_ID IS NULL;
cunj1qz1

cunj1qz12#

正确回答以免混淆

INSERT INTO schema.INFA_TASK_RUN (
         SUBJECT_AREA
        ,WORKFLOW_NAME
        ,VERSION_NUMBER
        ,SUBJECT_ID
        ,WORKFLOW_ID
        ,WORKFLOW_RUN_ID
        ,WORKLET_RUN_ID
        ,CHILD_RUN_ID
        ,INSTANCE_ID
        ,INSTANCE_NAME
        ,TASK_ID
        ,TASK_TYPE_NAME
        ,TASK_TYPE
        ,START_TIME
        ,END_TIME
        ,RUN_ERR_CODE
        ,RUN_ERR_MSG
        ,RUN_STATUS_CODE
        ,TASK_NAME
        ,TASK_VERSION_NUMBER
        ,SERVER_ID
        ,SERVER_NAME
        )
    SELECT
         SUBJECT_AREA
        ,WORKFLOW_NAME
        ,VERSION_NUMBER
        ,SUBJECT_ID
        ,WORKFLOW_ID
        ,WORKFLOW_RUN_ID
        ,WORKLET_RUN_ID
        ,CHILD_RUN_ID
        ,INSTANCE_ID
        ,INSTANCE_NAME
        ,TASK_ID
        ,TASK_TYPE_NAME
        ,TASK_TYPE
        ,START_TIME
        ,END_TIME
        ,RUN_ERR_CODE
        ,RUN_ERR_MSG
        ,RUN_STATUS_CODE
        ,TASK_NAME
        ,TASK_VERSION_NUMBER
        ,SERVER_ID
        ,SERVER_NAME
    FROM schema.INFA_TASK_RUN_STG src
    ON DUPLICATE KEY UPDATE
         END_TIME = src.END_TIME
        ,RUN_ERR_CODE = src.RUN_ERR_CODE
        ,RUN_ERR_MSG = src.RUN_ERR_MSG
        ,RUN_STATUS_CODE = src.RUN_STATUS_CODE;

相关问题