Introduction

Links
Overview
vlookup & xlookup
More examples
Variable selection in the expressions
Joins

Overview

Package provides pipe-style interface for data.table package. It preserves all data.table features without significant impact on performance. let and take functions are simplified interfaces for most common data manipulation tasks.

To select rows from data: rows(mtcars, am==0)
To select columns from data: columns(mtcars, mpg, vs:carb)
To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
To get total summary skip by argument: take_all(mtcars, mean)
Use magrittr pipe %>% to chain several operations:

     mtcars %>%
        let(mpg_hp = mpg/hp) %>%
        take(mean(mpg_hp), by = am)

To modify variables or add new variables:

      mtcars %>%
         let(new_var = 42,
             new_var2 = new_var*hp) %>%
         head()

To drop variable assign NULL: let(mtcars, am = NULL) %>% head()
To modify all non-grouping variables:

    iris %>%
      let_all(
          scaled = (.x - mean(.x))/sd(.x),
          by = Species) %>%
       head()

To aggregate all variables conditionally on name:

    iris %>%
      take_all(
          mean = if(startsWith(.name, "Sepal")) mean(.x),
          median = if(startsWith(.name, "Petal")) median(.x),
          by = Species
      )

For parametric assignment use :=:

    new_var = "my_var"
    old_var = "mpg"
    mtcars %>%
        let((new_var) := get(old_var)*2) %>%
        head()
     
    # or,  
    expr = quote(mean(cyl))
    mtcars %>% 
        let((new_var) := eval(expr)) %>% 
        head()
    
    # the same with `take` 
    by_var = "vs,am"
    take(mtcars, (new_var) := eval(expr), by = by_var)

query_if function translates its arguments one-to-one to [.data.table method. Additionally there are some conveniences such as automatic data.frame conversion to data.table.

vlookup & xlookup

Let’s make datasets for lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

# xlookup
workers = let(workers,
  position = xlookup(name, positions$name, positions$position)
)

# vlookup
# by default we search in the first column and return values from second column
workers = let(workers,
  position = vlookup(name, positions, no_match = "Not found")
)

# the same 
workers = let(workers,
  position = vlookup(name, positions, 
                     result_column = "position", 
                     no_match = "Not found") # or, result_column = 2 
)

head(workers)

##       name company  position
##     <char>  <char>    <char>
## 1:    Nick    Acme Not found
## 2:    John    Ajax  designer
## 3: Daniela    Ajax  engineer

More examples

We will use for demonstartion well-known mtcars dataset and some examples from dplyr package.

library(maditr)
data(mtcars)

# Newly created variables are available immediately
mtcars %>%
    let(
        cyl2 = cyl * 2,
        cyl4 = cyl2 * 2
    ) %>% head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4    12
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4    12
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1     8
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1    12
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2    16
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1    12
##     cyl4
##    <num>
## 1:    24
## 2:    24
## 3:    16
## 4:    24
## 5:    32
## 6:    24

# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
    let(
        mpg = NULL,
        disp = disp * 0.0163871 # convert to litres
    ) %>% head()

##      cyl     disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num>    <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:     6 2.621936   110  3.90 2.620 16.46     0     1     4     4
## 2:     6 2.621936   110  3.90 2.875 17.02     0     1     4     4
## 3:     4 1.769807    93  3.85 2.320 18.61     1     1     4     1
## 4:     6 4.227872   110  3.08 3.215 19.44     1     0     3     1
## 5:     8 5.899356   175  3.15 3.440 17.02     0     0     3     2
## 6:     6 3.687098   105  2.76 3.460 20.22     1     0     3     1

# window functions are useful for grouped computations
mtcars %>%
    let(rank = rank(-mpg, ties.method = "min"),
        by = cyl) %>%
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  rank
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <int>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4     2
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4     2
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1     8
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1     1
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2     2
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1     6

# You can drop variables by setting them to NULL
mtcars %>%
    let(cyl = NULL) %>%
    head()

##      mpg  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0   160   110  3.90 2.620 16.46     0     1     4     4
## 2:  21.0   160   110  3.90 2.875 17.02     0     1     4     4
## 3:  22.8   108    93  3.85 2.320 18.61     1     1     4     1
## 4:  21.4   258   110  3.08 3.215 19.44     1     0     3     1
## 5:  18.7   360   175  3.15 3.440 17.02     0     0     3     2
## 6:  18.1   225   105  2.76 3.460 20.22     1     0     3     1

# keeps all existing variables
mtcars %>%
    let(displ_l = disp / 61.0237) %>%
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  displ_l
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4 2.621932
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4 2.621932
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1 1.769804
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1 4.227866
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2 5.899347
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1 3.687092

# keeps only the variables you create
mtcars %>%
    take(displ_l = disp / 61.0237) %>% 
    head()

##     displ_l
##       <num>
## 1: 2.621932
## 2: 2.621932
## 3: 1.769804
## 4: 4.227866
## 5: 5.899347
## 6: 3.687092

# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
    let(cyl = cyl * var) %>%
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0   600   160   110  3.90 2.620 16.46     0     1     4     4
## 2:  21.0   600   160   110  3.90 2.875 17.02     0     1     4     4
## 3:  22.8   400   108    93  3.85 2.320 18.61     1     1     4     1
## 4:  21.4   600   258   110  3.08 3.215 19.44     1     0     3     1
## 5:  18.7   800   360   175  3.15 3.440 17.02     0     0     3     2
## 6:  18.1   600   225   105  2.76 3.460 20.22     1     0     3     1

# select rows
mtcars %>%
    rows(am==0) %>% 
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 2:  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 3:  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 4:  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 5:  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 6:  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2

# select rows with compound condition
mtcars %>%
    rows(am==0 & mpg>mean(mpg))

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 2:  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 3:  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 4:  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1

# select columns
mtcars %>% 
    columns(vs:carb, cyl)

##                     vs am gear carb cyl
## Mazda RX4            0  1    4    4   6
## Mazda RX4 Wag        0  1    4    4   6
## Datsun 710           1  1    4    1   4
## Hornet 4 Drive       1  0    3    1   6
## Hornet Sportabout    0  0    3    2   8
## Valiant              1  0    3    1   6
## Duster 360           0  0    3    4   8
## Merc 240D            1  0    4    2   4
## Merc 230             1  0    4    2   4
## Merc 280             1  0    4    4   6
## Merc 280C            1  0    4    4   6
## Merc 450SE           0  0    3    3   8
## Merc 450SL           0  0    3    3   8
## Merc 450SLC          0  0    3    3   8
## Cadillac Fleetwood   0  0    3    4   8
## Lincoln Continental  0  0    3    4   8
## Chrysler Imperial    0  0    3    4   8
## Fiat 128             1  1    4    1   4
## Honda Civic          1  1    4    2   4
## Toyota Corolla       1  1    4    1   4
## Toyota Corona        1  0    3    1   4
## Dodge Challenger     0  0    3    2   8
## AMC Javelin          0  0    3    2   8
## Camaro Z28           0  0    3    4   8
## Pontiac Firebird     0  0    3    2   8
## Fiat X1-9            1  1    4    1   4
## Porsche 914-2        0  1    5    2   4
## Lotus Europa         1  1    5    2   4
## Ford Pantera L       0  1    5    4   8
## Ferrari Dino         0  1    5    6   6
## Maserati Bora        0  1    5    8   8
## Volvo 142E           1  1    4    2   4

mtcars %>% 
    columns(-am, -cyl)

##                      mpg  disp  hp drat    wt  qsec vs gear carb
## Mazda RX4           21.0 160.0 110 3.90 2.620 16.46  0    4    4
## Mazda RX4 Wag       21.0 160.0 110 3.90 2.875 17.02  0    4    4
## Datsun 710          22.8 108.0  93 3.85 2.320 18.61  1    4    1
## Hornet 4 Drive      21.4 258.0 110 3.08 3.215 19.44  1    3    1
## Hornet Sportabout   18.7 360.0 175 3.15 3.440 17.02  0    3    2
## Valiant             18.1 225.0 105 2.76 3.460 20.22  1    3    1
## Duster 360          14.3 360.0 245 3.21 3.570 15.84  0    3    4
## Merc 240D           24.4 146.7  62 3.69 3.190 20.00  1    4    2
## Merc 230            22.8 140.8  95 3.92 3.150 22.90  1    4    2
## Merc 280            19.2 167.6 123 3.92 3.440 18.30  1    4    4
## Merc 280C           17.8 167.6 123 3.92 3.440 18.90  1    4    4
## Merc 450SE          16.4 275.8 180 3.07 4.070 17.40  0    3    3
## Merc 450SL          17.3 275.8 180 3.07 3.730 17.60  0    3    3
## Merc 450SLC         15.2 275.8 180 3.07 3.780 18.00  0    3    3
## Cadillac Fleetwood  10.4 472.0 205 2.93 5.250 17.98  0    3    4
## Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82  0    3    4
## Chrysler Imperial   14.7 440.0 230 3.23 5.345 17.42  0    3    4
## Fiat 128            32.4  78.7  66 4.08 2.200 19.47  1    4    1
## Honda Civic         30.4  75.7  52 4.93 1.615 18.52  1    4    2
## Toyota Corolla      33.9  71.1  65 4.22 1.835 19.90  1    4    1
## Toyota Corona       21.5 120.1  97 3.70 2.465 20.01  1    3    1
## Dodge Challenger    15.5 318.0 150 2.76 3.520 16.87  0    3    2
## AMC Javelin         15.2 304.0 150 3.15 3.435 17.30  0    3    2
## Camaro Z28          13.3 350.0 245 3.73 3.840 15.41  0    3    4
## Pontiac Firebird    19.2 400.0 175 3.08 3.845 17.05  0    3    2
## Fiat X1-9           27.3  79.0  66 4.08 1.935 18.90  1    4    1
## Porsche 914-2       26.0 120.3  91 4.43 2.140 16.70  0    5    2
## Lotus Europa        30.4  95.1 113 3.77 1.513 16.90  1    5    2
## Ford Pantera L      15.8 351.0 264 4.22 3.170 14.50  0    5    4
## Ferrari Dino        19.7 145.0 175 3.62 2.770 15.50  0    5    6
## Maserati Bora       15.0 301.0 335 3.54 3.570 14.60  0    5    8
## Volvo 142E          21.4 121.0 109 4.11 2.780 18.60  1    4    2

# regular expression pattern
columns(iris, "^Petal") %>% head() # variables which start from 'Petal'

##   Petal.Length Petal.Width
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4

columns(iris, "Width$") %>% head() # variables which end with 'Width'

##   Sepal.Width Petal.Width
## 1         3.5         0.2
## 2         3.0         0.2
## 3         3.2         0.2
## 4         3.1         0.2
## 5         3.6         0.2
## 6         3.9         0.4

# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.") %>% head()

##   Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  setosa          5.1         3.5          1.4         0.2
## 2  setosa          4.9         3.0          1.4         0.2
## 3  setosa          4.7         3.2          1.3         0.2
## 4  setosa          4.6         3.1          1.5         0.2
## 5  setosa          5.0         3.6          1.4         0.2
## 6  setosa          5.4         3.9          1.7         0.4

# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al") %>% head()

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

# numeric indexing - all variables except Species
columns(iris, 1:4) %>% head()

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
    take(mean = mean(disp), n = .N)

##        mean     n
##       <num> <int>
## 1: 230.7219    32

# Usually, you'll want to group first
mtcars %>%
    take(mean = mean(disp), n = .N, by = am)

##       am     mean     n
##    <num>    <num> <int>
## 1:     1 143.5308    13
## 2:     0 290.3789    19

# grouping by multiple variables
mtcars %>%
    take(mean = mean(disp), n = .N, by = list(am, vs))

##       am    vs     mean     n
##    <num> <num>    <num> <int>
## 1:     1     0 206.2167     6
## 2:     1     1  89.8000     7
## 3:     0     1 175.1143     7
## 4:     0     0 357.6167    12

# You can group by expressions:
mtcars %>%
    take_all(
        mean,
        by = list(vsam = vs + am)
    )

##     vsam      mpg      cyl     disp        hp     drat       wt     qsec
##    <num>    <num>    <num>    <num>     <num>    <num>    <num>    <num>
## 1:     1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2:     2 28.37143 4.000000  89.8000  80.57143 4.148571 2.028286 18.70000
## 3:     0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
##        gear     carb
##       <num>    <num>
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333

# modify all non-grouping variables in-place
mtcars %>%
    let_all((.x - mean(.x))/sd(.x), by = am) %>%
    head()

##           mpg        cyl       disp         hp       drat         wt       qsec
##         <num>      <num>      <num>      <num>      <num>      <num>      <num>
## 1: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.3387459 -0.5021316
## 2: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.7520483 -0.1896942
## 3: -0.2582189 -0.6936702 -0.4074443 -0.4026317 -0.5493732 -0.1474922  0.6974050
## 4:  1.1091990 -0.6133196 -0.2938955 -0.9323843 -0.5259081 -0.7124963  0.7176592
## 5:  0.4049674  0.6814663  0.6319326  0.2733692 -0.3474750 -0.4230701 -0.6641654
## 6:  0.2484716 -0.6133196 -0.5934281 -1.0251346 -1.3416023 -0.3973433  1.1630407
##            vs    am       gear       carb
##         <num> <num>      <num>      <num>
## 1: -1.0377490     1 -0.7595545  0.4944600
## 2: -1.0377490     1 -0.7595545  0.4944600
## 3:  0.8894992     1 -0.7595545 -0.8829642
## 4:  1.2743862     0 -0.5026247 -1.5141438
## 5: -0.7433919     0 -0.5026247 -0.6423641
## 6:  1.2743862     0 -0.5026247 -1.5141438

# modify all non-grouping variables to new variables
mtcars %>%
    let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mpg_scaled
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>      <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4 -0.5501185
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4 -0.5501185
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1 -0.2582189
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1  1.1091990
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2  0.4049674
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1  0.2484716
##    cyl_scaled disp_scaled  hp_scaled drat_scaled  wt_scaled qsec_scaled
##         <num>       <num>      <num>       <num>      <num>       <num>
## 1:  0.5945745   0.1888587 -0.2004008  -0.4120299  0.3387459  -0.5021316
## 2:  0.5945745   0.1888587 -0.2004008  -0.4120299  0.7520483  -0.1896942
## 3: -0.6936702  -0.4074443 -0.4026317  -0.5493732 -0.1474922   0.6974050
## 4: -0.6133196  -0.2938955 -0.9323843  -0.5259081 -0.7124963   0.7176592
## 5:  0.6814663   0.6319326  0.2733692  -0.3474750 -0.4230701  -0.6641654
## 6: -0.6133196  -0.5934281 -1.0251346  -1.3416023 -0.3973433   1.1630407
##     vs_scaled gear_scaled carb_scaled
##         <num>       <num>       <num>
## 1: -1.0377490  -0.7595545   0.4944600
## 2: -1.0377490  -0.7595545   0.4944600
## 3:  0.8894992  -0.7595545  -0.8829642
## 4:  1.2743862  -0.5026247  -1.5141438
## 5: -0.7433919  -0.5026247  -0.6423641
## 6:  1.2743862  -0.5026247  -1.5141438

# conditionally modify all variables
iris %>%
    let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
##           <num>       <num>        <num>       <num>  <fctr>             <num>
## 1:          5.1         3.5          1.4         0.2  setosa          5.843333
## 2:          4.9         3.0          1.4         0.2  setosa          5.843333
## 3:          4.7         3.2          1.3         0.2  setosa          5.843333
## 4:          4.6         3.1          1.5         0.2  setosa          5.843333
## 5:          5.0         3.6          1.4         0.2  setosa          5.843333
## 6:          5.4         3.9          1.7         0.4  setosa          5.843333
##    Sepal.Width_mean Petal.Length_mean Petal.Width_mean
##               <num>             <num>            <num>
## 1:         3.057333             3.758         1.199333
## 2:         3.057333             3.758         1.199333
## 3:         3.057333             3.758         1.199333
## 4:         3.057333             3.758         1.199333
## 5:         3.057333             3.758         1.199333
## 6:         3.057333             3.758         1.199333

# modify all variables conditionally on name
iris %>%
    let_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    ) %>%
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
##           <num>       <num>        <num>       <num>  <fctr>             <num>
## 1:          5.1         3.5          1.4         0.2  setosa             5.006
## 2:          4.9         3.0          1.4         0.2  setosa             5.006
## 3:          4.7         3.2          1.3         0.2  setosa             5.006
## 4:          4.6         3.1          1.5         0.2  setosa             5.006
## 5:          5.0         3.6          1.4         0.2  setosa             5.006
## 6:          5.4         3.9          1.7         0.4  setosa             5.006
##    Sepal.Width_mean Petal.Length_median Petal.Width_median
##               <num>               <num>              <num>
## 1:            3.428                 1.5                0.2
## 2:            3.428                 1.5                0.2
## 3:            3.428                 1.5                0.2
## 4:            3.428                 1.5                0.2
## 5:            3.428                 1.5                0.2
## 6:            3.428                 1.5                0.2

# aggregation with 'take_all'
mtcars %>%
    take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)

##       am mpg_mean cyl_mean disp_mean  hp_mean drat_mean  wt_mean qsec_mean
##    <num>    <num>    <num>     <num>    <num>     <num>    <num>     <num>
## 1:     1 24.39231 5.076923  143.5308 126.8462  4.050000 2.411000  17.36000
## 2:     0 17.14737 6.947368  290.3789 160.2632  3.286316 3.768895  18.18316
##      vs_mean gear_mean carb_mean   mpg_sd   cyl_sd   disp_sd    hp_sd   drat_sd
##        <num>     <num>     <num>    <num>    <num>     <num>    <num>     <num>
## 1: 0.5384615  4.384615  2.923077 6.166504 1.552500  87.20399 84.06232 0.3640513
## 2: 0.3684211  3.210526  2.736842 3.833966 1.544657 110.17165 53.90820 0.3923039
##        wt_sd  qsec_sd     vs_sd   gear_sd  carb_sd mpg_n cyl_n disp_n  hp_n
##        <num>    <num>     <num>     <num>    <num> <int> <int>  <int> <int>
## 1: 0.6169816 1.792359 0.5188745 0.5063697 2.177978    13    13     13    13
## 2: 0.7774001 1.751308 0.4955946 0.4188539 1.147079    19    19     19    19
##    drat_n  wt_n qsec_n  vs_n gear_n carb_n
##     <int> <int>  <int> <int>  <int>  <int>
## 1:     13    13     13    13     13     13
## 2:     19    19     19    19     19     19

# conditionally aggregate all variables
iris %>%
    take_all(mean = if(is.numeric(.x)) mean(.x))

##    Sepal.Length_mean Sepal.Width_mean Petal.Length_mean Petal.Width_mean
##                <num>            <num>             <num>            <num>
## 1:          5.843333         3.057333             3.758         1.199333

# aggregate all variables conditionally on name
iris %>%
    take_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    )

##       Species Sepal.Length_mean Sepal.Width_mean Petal.Length_median
##        <fctr>             <num>            <num>               <num>
## 1:     setosa             5.006            3.428                1.50
## 2: versicolor             5.936            2.770                4.35
## 3:  virginica             6.588            2.974                5.55
##    Petal.Width_median
##                 <num>
## 1:                0.2
## 2:                1.3
## 3:                2.0

# parametric evaluation:
var = quote(mean(cyl))
mtcars %>% 
    let(mean_cyl = eval(var)) %>% 
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mean_cyl
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4   6.1875
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4   6.1875
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1   6.1875
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1   6.1875
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2   6.1875
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1   6.1875

take(mtcars, eval(var))

##    eval(var)
##        <num>
## 1:    6.1875

# all together
new_var = "mean_cyl"
mtcars %>% 
    let((new_var) := eval(var)) %>% 
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mean_cyl
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4   6.1875
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4   6.1875
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1   6.1875
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1   6.1875
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2   6.1875
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1   6.1875

take(mtcars, (new_var) := eval(var))

##    mean_cyl
##       <num>
## 1:   6.1875

Variable selection in the expressions

You can use ‘columns’ inside expression in the ‘take’/‘let’. ‘columns’ will be replaced with data.table with selected columns. In ‘let’ in the expressions with ‘:=’, ‘cols’ or ‘%to%’ can be placed in the left part of the expression. It is usefull for multiple assignment. There are four ways of column selection:

Simply by column names
By variable ranges, e. g. vs:carb. Alternatively, you can use ‘%to%’ instead of colon: ‘vs %to% carb’.
With regular expressions. Characters which start with ‘^’ or end with $ considered as Perl-style regular expression patterns. For example, ‘^Petal’ returns all variables started with ‘Petal’. ‘Width$’ returns all variables which end with ‘Width’. Pattern ‘^.’ matches all variables and pattern ’^.*my_str’ is equivalent to contains “my_str”’.
By character variables with interpolated parts. Expression in the curly brackets inside characters will be evaluated in the parent frame with ‘text_expand’ function. For example, a{1:3} will be transformed to the names ‘a1’, ‘a2’, ‘a3’. ‘cols’ is just a shortcut for ‘columns’.

# range selection
iris %>% 
    let(
        avg = rowMeans(Sepal.Length %to% Petal.Width)
    ) %>% 
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species   avg
##           <num>       <num>        <num>       <num>  <fctr> <num>
## 1:          5.1         3.5          1.4         0.2  setosa 2.550
## 2:          4.9         3.0          1.4         0.2  setosa 2.375
## 3:          4.7         3.2          1.3         0.2  setosa 2.350
## 4:          4.6         3.1          1.5         0.2  setosa 2.350
## 5:          5.0         3.6          1.4         0.2  setosa 2.550
## 6:          5.4         3.9          1.7         0.4  setosa 2.850

# multiassignment
iris %>% 
    let(
        # starts with Sepal or Petal
        multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
    ) %>% 
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species multipled1
##           <num>       <num>        <num>       <num>  <fctr>      <num>
## 1:          5.1         3.5          1.4         0.2  setosa       10.2
## 2:          4.9         3.0          1.4         0.2  setosa        9.8
## 3:          4.7         3.2          1.3         0.2  setosa        9.4
## 4:          4.6         3.1          1.5         0.2  setosa        9.2
## 5:          5.0         3.6          1.4         0.2  setosa       10.0
## 6:          5.4         3.9          1.7         0.4  setosa       10.8
##    multipled2 multipled3 multipled4
##         <num>      <num>      <num>
## 1:        7.0        2.8        0.4
## 2:        6.0        2.8        0.4
## 3:        6.4        2.6        0.4
## 4:        6.2        3.0        0.4
## 5:        7.2        2.8        0.4
## 6:        7.8        3.4        0.8

mtcars %>% 
    let(
        # text expansion
        cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
    ) %>% 
    head()

##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb scaled_mpg
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>      <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4  0.1508848
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4  0.1508848
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1  0.4495434
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1  0.2172534
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2 -0.2307345
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1 -0.3302874
##    scaled_cyl scaled_disp  scaled_hp scaled_drat    scaled_wt scaled_qsec
##         <num>       <num>      <num>       <num>        <num>       <num>
## 1: -0.1049878 -0.57061982 -0.5350928   0.5675137 -0.610399567  -0.7771651
## 2: -0.1049878 -0.57061982 -0.5350928   0.5675137 -0.349785269  -0.4637808
## 3: -1.2248578 -0.99018209 -0.7830405   0.4739996 -0.917004624   0.4260068
## 4: -0.1049878  0.22009369 -0.5350928  -0.9661175 -0.002299538   0.8904872
## 5:  1.0148821  1.04308123  0.4129422  -0.8351978  0.227654255  -0.4637808
## 6: -0.1049878 -0.04616698 -0.6080186  -1.5646078  0.248094592   1.3269868
##     scaled_vs  scaled_am scaled_gear scaled_carb
##         <num>      <num>       <num>       <num>
## 1: -0.8680278  1.1899014   0.4235542   0.7352031
## 2: -0.8680278  1.1899014   0.4235542   0.7352031
## 3:  1.1160357  1.1899014   0.4235542  -1.1221521
## 4:  1.1160357 -0.8141431  -0.9318192  -1.1221521
## 5: -0.8680278 -0.8141431  -0.9318192  -0.5030337
## 6:  1.1160357 -0.8141431  -0.9318192  -1.1221521

# range selection in 'by'
# selection of range + additional column
mtcars %>% 
    take(
        res = sum(cols(mpg, disp %to% drat)),
        by = vs %to% gear
    )

##       vs    am  gear     res
##    <num> <num> <num>   <num>
## 1:     0     1     4  589.80
## 2:     1     1     4 1177.97
## 3:     1     0     3  985.64
## 4:     0     0     3 6839.45
## 5:     1     0     4 1125.35
## 6:     0     1     5 1874.61
## 7:     1     1     5  242.27

Joins

Here we use the same datasets as with lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

workers

##       name company
##     <char>  <char>
## 1:    Nick    Acme
## 2:    John    Ajax
## 3: Daniela    Ajax

positions

##       name position
##     <char>   <char>
## 1:    John designer
## 2: Daniela engineer
## 3:  Cathie  manager

Different kinds of joins:

workers %>% dt_inner_join(positions)

## dt_inner_join: joining, by = "name"

##       name company position
##     <char>  <char>   <char>
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer

workers %>% dt_left_join(positions)

## dt_left_join: joining, by = "name"

##       name company position
##     <char>  <char>   <char>
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer

workers %>% dt_right_join(positions)

## dt_right_join: joining, by = "name"

##       name company position
##     <char>  <char>   <char>
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer
## 3:  Cathie    <NA>  manager

workers %>% dt_full_join(positions)

## dt_full_join: joining, by = "name"

##       name company position
##     <char>  <char>   <char>
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer
## 4:  Cathie    <NA>  manager

# filtering joins
workers %>% dt_anti_join(positions)

## dt_anti_join: joining, by = "name"

##      name company
##    <char>  <char>
## 1:   Nick    Acme

workers %>% dt_semi_join(positions)

## dt_semi_join: joining, by = "name"

##       name company
##     <char>  <char>
## 1:    John    Ajax
## 2: Daniela    Ajax

To suppress the message, supply by argument:

workers %>% dt_left_join(positions, by = "name")

Use a named by if the join variables have different names:

positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
workers %>% dt_inner_join(positions2, by = c("name" = "worker"))