R语言 桑基图可视化

zbq4xfa0  于 8个月前  发布在  其他
关注(0)|答案(1)|浏览(78)

我正试图通过一个桑基图来可视化我的数据。
我有以下dataframe:

sankey1 <- structure(list(pat_id = c(10037, 10264, 10302, 10302, 10302, 
10344, 10482, 10482, 10482, 10613, 10613, 10613, 10628, 10851, 
11052, 11203, 11214, 11214, 11566, 11684, 11821, 11945, 11945, 
11952, 11952, 12122, 12183, 12774, 13391, 13573, 13643, 14298, 
14556, 14556, 14648, 14862, 14935, 14935, 14999, 15514, 15811, 
16045, 16045, 16190, 16190, 16190, 16220, 16220, 16220, 16220
), contactnummer = c(1, 1, 1, 2, 3, 1, 1, 2, 3, 1, 2, 3, 1, 1, 
1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 
1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 3, 1, 2, 3, 99), Combo2 = c(1, 
1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 
2, 4, 4, 1, 5, 1, 1, 1, 1, 3, 3, 1, 5, 1, 1, 3, 1, 1, 1, 1, 1, 
3, 6, 3, 1, 1, 1, 1), treatment = c(99, 0, 0, 1, 1, 0, 99, 99, 
99, 99, 99, 1, 1, 0, 1, 99, 99, 99, 0, 99, 99, 0, 0, 0, 1, 99, 
99, 0, 0, 0, 0, 0, 1, 1, 1, 99, 99, 1, 0, 0, 1, 0, 0, 0, 1, 1, 
99, 99, 99, 99)), row.names = c(NA, 50L), class = c("data.table", 
"data.frame"))

# A tibble: 50 x 4
   pat_id contactnummer Combo2 treatment
    <dbl>         <dbl>  <dbl>     <dbl>
 1  10037             1      1        99
 2  10264             1      1         0
 3  10302             1      1         0
 4  10302             2      1         1
 5  10302             3      2         1
 6  10344             1      1         0
 7  10482             1      2        99
 8  10482             2      1        99
 9  10482             3      1        99
10  10613             1      1        99

框架包含访问GP的参与者(“pat_id”)的信息。在一次访问或接触(“contactnummer”)中,GP评估症状的组合(“combo 2”)并给予治疗(“treatment”)。一些参与者(不是全部)访问GP进行第二次(甚至第三次)接触。对于每一次接触,全科医生将评估症状,并给予给予治疗。
目的是说明这些参与者的路径。哪些症状导致哪种治疗以及何时(什么接触)。我希望在一个sankey图中做到这一点。(https://r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html
我的目标是这样想象它:

  • 用特定的颜色将症状的组合可视化
  • 以特定颜色显示每个治疗选项(节点)

理想情况下,所需的输出看起来像这样:x1c 0d1x或此:

我想有组合(“组合2”)作为箭头,显示在每个独特的组合不同的颜色。然后这些箭头应该导致治疗。但是,我希望他们继续,所以在接触1 -如果一个身份证号码有第二次接触,箭头再次显示什么组合后,治疗发生,以及什么治疗,它导致在第二次接触。

编辑后

在用户s__的帮助下,我使用了以下脚本

# messing up with data: the goal is to create data.frame
# with source and targets to feed the sankey
df <-
sankey1 %>%  
  # wide format to gives an order
  pivot_wider(id_cols = pat_id
               , names_from = contactnummer
               , values_from = c(Combo2,treatment)
               ,names_glue = "{contactnummer}_{.value}"
               ,names_sort=TRUE) %>% 
  # put in a long format
  pivot_longer(!pat_id, names_to = 'variable', values_to = 'value') %>%
  # remove nas
  filter(!is.na(value)) %>%
  # grouping and creating the source field by pat_id
  group_by(pat_id) %>% 
  mutate(source = paste(substr(variable,1,15),value, sep = '_')) %>% 
  # useful columns
  select(pat_id, source) %>% 
  # arrange 
  arrange(pat_id, source) %>% 
  # adding by group the target column
  mutate(target = c(source[2:length(source)],NA)) 

# define source and target
links <- data.frame(source =df$source,
                    target   =df$target) %>% 
  filter(!is.na(target))

# getting unique nodes
nodes <- data.frame(name = as.character(unique(c(links$source, links$target)))) 

# now convert as character
links$source <- as.character(links$source)
links$target<- as.character(links$target)

# matching links and node, then indexing to 0
links$source <- match(links$source, nodes$name) - 1
links$target <- match(links$target, nodes$name) - 1

# group by (we are grouping by number of rows)
links <- links %>% group_by(source, target) %>% tally()
   
# plot it!
sankeyNetwork(Links = links
              , Nodes = nodes
              , Source = 'source'
              , Target = 'target'
              , Value = 'n'
              , NodeID = 'name'
              ,fontSize = 15)

这非常接近,但还不是期望的输出。我试过编辑源,目标和节点如下,但这肯定不是所需的输出。

df <-
        sankey2 %>%  
        # wide format to gives an order
        pivot_wider(id_cols = pat_id
                    , names_from = contactnummer
                    , values_from = c(Combo2,treatment)
                    ,names_glue = "{contactnummer}_{.value}"
                    ,names_sort=TRUE) %>% 
        # put in a long format
        pivot_longer(!pat_id, names_to = 'variable', values_to = 'value') %>%
        # remove nas
        filter(!is.na(value)) %>%
        # grouping and creating the source field by pat_id
        group_by(pat_id) %>% 
        mutate(source = paste(substr(variable,1,15),value, sep = '_')) %>% 
        # useful columns
        select(pat_id, source) %>% 
        # arrange 
        arrange(pat_id, source)  %>% 
        mutate(number = ave(pat_id, FUN = seq_along)) %>%
        # adding by group the target column
        pivot_wider(pat_id, values_from = source, names_from = number  )#
      
      names(df)[names(df) == '1'] <- 'Combo2_1'
      names(df)[names(df) == '2'] <- 'treatment_1'
      names(df)[names(df) == '3'] <- 'Combo2_2'
      names(df)[names(df) == '4'] <- 'treatment_2'
      names(df)[names(df) == '5'] <- 'Combo2_3'
      names(df)[names(df) == '6'] <- 'treatment_3'
      
      df <- df %>%
            pivot_longer(!pat_id, names_to = c(".value", "contact"), names_sep = "_")
      df <- df[!is.na(df$Combo2),]
      df <- df %>%
            select(pat_id, Combo2, treatment)
      
      names(df)[names(df) == 'Combo2'] <- 'source'
      names(df)[names(df) == 'treatment'] <- 'target'
            
      # define source and target
      links <- data.frame(source =df$source,
                          target   =df$target) %>% 
        filter(!is.na(target))
      
      # getting unique nodes
      nodes <- data.frame(name = as.character(unique(c(links$source, links$target)))) 
      
      
      # now convert as character
      links$source <- as.character(links$source)
      links$target<- as.character(links$target)
      
      # matching links and node, then indexing to 0
      links$source <- match(links$source, nodes$name) - 1
      links$target <- match(links$target, nodes$name) - 1
      
      # group by (we are grouping by number of rows)
      links <- links %>% group_by(source, target) %>% tally()
      
      # plot it!
      sankeyNetwork(Links = links
                    , Nodes = nodes
                    , Source = 'source'
                    , Target = 'target'
                    , Value = 'n'
                    , NodeID = 'name'
                    ,fontSize = 15
                    )

我真的想不通。如有任何帮助,我们将不胜感激!

1u4esq0p

1u4esq0p1#

对不起,但对我来说,这是不是太清楚你会如何管理组等,但我们可以从这样的东西开始:

评论后编辑

# load necessary libraries
library(networkD3)
library(d3Network)
library(dplyr)
library(tidyr)

# messing up with data: the goal is to create data.frame
# with source and targets to feed the sankey
df <-
sankey1 %>%  
  # wide format to gives an order
  pivot_wider(id_cols = pat_id
               , names_from = contactnummer
               , values_from = c(Combo2,treatment)
               ,names_glue = "{contactnummer}_{.value}"
               ,names_sort=TRUE) %>% 
  # put in a long format
  pivot_longer(!pat_id, names_to = 'variable', values_to = 'value') %>%
  # remove nas
  filter(!is.na(value)) %>%
  # grouping and creating the source field by pat_id
  group_by(pat_id) %>% 
  mutate(source = paste(substr(variable,1,15),value, sep = '_')) %>% 
  # useful columns
  select(pat_id, source) %>% 
  # arrange 
  arrange(pat_id, source) %>% 
  # adding by group the target column
  mutate(target = c(source[2:length(source)],NA)) 

# define source and target
links <- data.frame(source =df$source,
                    target   =df$target) %>% 
  filter(!is.na(target))

# getting unique nodes
nodes <- data.frame(name = as.character(unique(c(links$source, links$target)))) 

# now convert as character
links$source <- as.character(links$source)
links$target<- as.character(links$target)

# matching links and node, then indexing to 0
links$source <- match(links$source, nodes$name) - 1
links$target <- match(links$target, nodes$name) - 1

# group by (we are grouping by number of rows)
links <- links %>% group_by(source, target) %>% tally()
   
# plot it!
sankeyNetwork(Links = links
              , Nodes = nodes
              , Source = 'source'
              , Target = 'target'
              , Value = 'n'
              , NodeID = 'name'
              ,fontSize = 15)

希望能帮上忙!

相关问题