r - Joining multiple tables with mix data -


this question has answer here:

my data:

data1 <- data.frame(from = c(1, 2, 13, 4),                     = c(4, 3, 9, 1),                     values = c(12, 56, 67, 78))   data2 <- data.frame(place = c("ny", "london", "brest", "nantes"),                     id = c(1, 2, 3, 4))  

my results:

from values    1  4     12    2  3     56   13  9     67    4  1     78    place id      ny  1  london  2   brest  3  nantes  4 

what expect using join function dplyr package (in new table)

from           values ny     nantes     12 london  brest     56 london     ny     78 

what tried:

 data3<- inner_join (data1, data2, =c("from" = "id", "to" = "id"))  data3 

some references:
https://stat545-ubc.github.io/bit001_dplyr-cheatsheet.html
https://cran.r-project.org/web/packages/dplyr/vignettes/two-table.html

a bigger example mix data

consider have 50 columns geographical data ("places") , non geographical data (levels, values)
doesnt wish change columns'order of d.f.
want keep columns names

   data1 <- data.frame(levels1 = c("name1", "name2", "name3", "name4"),                        value1 = c(4, 3, 9, 1),                        firstplace = c(1, 2, 13, 4),                          secondplace = c(1, 2, 2, 4),                        value2  = c(78, 3000, 90, 101),                        thirdplace =c(1, 1, 2, 4),                        fourthplace=c(4, 4, 4, 4),                        fifthplace=c(1, 2, 3, 4),                         value3 = c(12, 56, 67, 78))     data2 <- data.frame(place = c("ny", "london", "brest", "nantes"),                     id = c(1, 2, 3, 4))  

a example different names (more complex?)

i doesnt wish change columns'order of d.f.
want keep columns names

   data1 <- data.frame(levels1 = c("name1", "name2", "name3", "name4"),                        value1 = c(4, 3, 9, 1),                        shops= c(1, 2, 13, 4),                          after_sales_service = c(1, 2, 2, 4),                        value2  = c(78, 3000, 90, 101),                        provider =c(1, 1, 2, 4),                        seller=c(4, 4, 4, 4),                        maker=c(1, 2, 3, 4),                         value3 = c(12, 56, 67, 78))     data2 <- data.frame(place = c("ny", "london", "brest", "nantes"),                     id = c(1, 2, 3, 4))  

instead of joining, can use data2 look-up table:

library(dplyr) data1 <- data1 %>%    mutate(from = data2$place[match(from, data2$id)],          = data2$place[match(to, data2$id)]) %>%   filter(complete.cases(.)) 

gives:

> data1         values 1     ny nantes     12 2 london  brest     56 3 nantes     ny     78 

an alternative solution data.table package:

library(data.table) na.omit(setdt(data1)[, `:=` (from = data2$place[match(from, data2$id)],                              = data2$place[match(to, data2$id)])]) 

you double left_join:

data1 %>%    left_join(., data2, = c("from"="id")) %>%   left_join(., data2, = c("to"="id")) %>%   select(-c(1:2)) %>%   filter(complete.cases(.)) 

update 1: if have multiple columns names have matched, better transform dataframe long form first. example bigger dataset:

library(dplyr) library(tidyr) data1 %>%   gather(var, val, -values) %>%   left_join(., data2, = c("val"="id")) %>%   select(-3) %>%   filter(!is.na(place)) %>%    spread(var, place) 

which gives:

  values fifthplace firstplace fourthplace   secondplace thirdplace     1     12         ny         ny      nantes     ny          ny         ny nantes 2     56     london     london      nantes london      london         ny  brest 3     67      brest       <na>      nantes   <na>      london     london   <na> 4     78     nantes     nantes      nantes nantes      nantes     nantes     ny 

with data.table do:

library(data.table) dcast(melt(setdt(data1),            id.vars = "values")[data2, on = c(value="id")],       values ~ variable, value.var = "place") 

giving same result.


update 2: in response second update of question, can approach follows dplyr / tidyr:

data1 %>%   gather(var, val, c(firstplace,secondplace,thirdplace,fourthplace,fifthplace)) %>%   left_join(., data2, = c("val"="id")) %>%   select(-val) %>%   spread(var, place) 

which gives:

  levels1 value1 value2 value3 fifthplace firstplace fourthplace secondplace thirdplace 1   name1      4     78     12         ny         ny      nantes          ny         ny 2   name2      3   3000     56     london     london      nantes      london         ny 3   name3      9     90     67      brest       <na>      nantes      london     london 4   name4      1    101     78     nantes     nantes      nantes      nantes     nantes 

or data.table:

mvars <- c("firstplace","secondplace","thirdplace","fourthplace","fifthplace") dcast(melt(setdt(data1),            measure.vars = mvars)[data2, on = c(value="id")],       levels1 + value1 + value2 + value3 ~ variable, value.var = "place") 

which gives same result:

   levels1 value1 value2 value3 firstplace secondplace thirdplace fourthplace fifthplace 1:   name1      4     78     12         ny          ny         ny      nantes         ny 2:   name2      3   3000     56     london      london         ny      nantes     london 3:   name3      9     90     67         na      london     london      nantes      brest 4:   name4      1    101     78     nantes      nantes     nantes      nantes     nantes 

update 3: if want work index numbers, can do:

# dplyr / tidyr data1 %>%   gather(var, val, c(3,4,6:8)) %>%   left_join(., data2, = c("val"="id")) %>%   select(-val) %>%   spread(var, place)  # data.table dcast(melt(setdt(data1),            measure.vars = c(3,4,6:8))[data2, on = c(value="id")],       levels1 + value1 + value2 + value3 ~ variable, value.var = "place") 

which gives (data.table output):

   levels1 value1 value2 value3  shops after_sales_service provider seller  maker 1:   name1      4     78     12     ny                  ny       ny nantes     ny 2:   name2      3   3000     56 london              london       ny nantes london 3:   name3      9     90     67     na              london   london nantes  brest 4:   name4      1    101     78 nantes              nantes   nantes nantes nantes 

Comments

Popular posts from this blog

routing - AngularJS State management ->load multiple states in one page -

python - GRASS parser() error -

Swift game error message -