loading...

Subsetting data.trames uses a syntax similar to tibble, or formulas for i, j, and possibly by or keyby to use the data.table syntax instead.

# S3 method for class 'data.trame'
x[i, j, by, keyby, with = TRUE, drop = FALSE, ...]

set_(x, i, j, value, byref = FALSE)

let_(x, i = NULL, j = seq_along(x), value)

Arguments

x

A data.trame object.

i

Selection of rows by indices, negative indices, logical or a formula

j

Selection of columns by indices, negative indices, logical, names or a formula (both i and j must be formulas simultaneously). If := is used in the formula to create one or more new variables by reference, the expression must be placed between {} to avoid operators precedence issues, or better: := could be just replaced by ~.

by

Grouping columns (must be a formula and j must be also provided as a formula)

keyby

Either TRUE/FALSE if by is provided, or a formula (and j must also be provided as a formula)

with

Logical, whether to evaluate j in the data.trame if TRUE or in the calling environment if FALSE (default is TRUE). with = FALSE is similar to tibble subsetting and it is forced when i or j are not formulas.

drop

Coerce to a vector if the returned data.trame only has one column

...

Further arguments passed to the underlying data.table subsetting

value

The value to insert as subassignment in a data.trame object.

byref

Logical, whether to use by reference or not (FALSE by default).

Value

A data.trame object, or a vector if drop = TRUE and the result has only one column.

Examples

dtrm <- data.trame(
  a = 1:3,
  b = letters[1:3],
  c = factor(LETTERS[1:3])
)
# Subsetting rows, the tibble-way
dtrm[1:2, ]
#> # A data.trame: [2 × 3]
#>       a b     c    
#>   <int> <chr> <fct>
#> 1     1 a     A    
#> 2     2 b     B    
dtrm[-1, ]
#> # A data.trame: [2 × 3]
#>       a b     c    
#>   <int> <chr> <fct>
#> 1     2 b     B    
#> 2     3 c     C    
dtrm[c(TRUE, FALSE, TRUE), ]
#> # A data.trame: [2 × 3]
#>       a b     c    
#>   <int> <chr> <fct>
#> 1     1 a     A    
#> 2     3 c     C    
# On the contrary to data.table, providing only one arg, means subsetting
# columns (like for data.frame or tibble)
dtrm[c(TRUE, FALSE, TRUE)]
#> # A data.trame: [3 × 2]
#>       a c    
#>   <int> <fct>
#> 1     1 A    
#> 2     2 B    
#> 3     3 C    
dtrm[dtrm$a > 1, ] # Must fully qualify the column name
#> # A data.trame: [2 × 3]
#>       a b     c    
#>   <int> <chr> <fct>
#> 1     2 b     B    
#> 2     3 c     C    
# Subsetting the data.table way, with formulas: no fully qualification needed
dtrm[~a > 1, ]
#> # A data.trame: [2 × 3]
#>       a b     c    
#>   <int> <chr> <fct>
#> 1     2 b     B    
#> 2     3 c     C    

# Subsetting the columns, the tibble way
dtrm[, 1:2]
#> # A data.trame: [3 × 2]
#>       a b    
#>   <int> <chr>
#> 1     1 a    
#> 2     2 b    
#> 3     3 c    
dtrm[, -1]
#> # A data.trame: [3 × 2]
#>   b     c    
#>   <chr> <fct>
#> 1 a     A    
#> 2 b     B    
#> 3 c     C    
dtrm[, c(TRUE, FALSE, TRUE)]
#> # A data.trame: [3 × 2]
#>       a c    
#>   <int> <fct>
#> 1     1 A    
#> 2     2 B    
#> 3     3 C    
dtrm[, c("a", "b")]
#> # A data.trame: [3 × 2]
#>       a b    
#>   <int> <chr>
#> 1     1 a    
#> 2     2 b    
#> 3     3 c    
# You must set drop = TRUE explicitly to return a vector
dtrm[, 2] # Still a data.trame, like tibble, but unlike the data.frame method
#> # A data.trame: [3 × 1]
#>   b    
#>   <chr>
#> 1 a    
#> 2 b    
#> 3 c    
dtrm[, 2, drop = TRUE] # Now a vector
#> [1] "a" "b" "c"
# The selection is referentially transparent, i.e., you can do:
sel <- c("c", "b")
dtrm[, sel]
#> # A data.trame: [3 × 2]
#>   c     b    
#>   <fct> <chr>
#> 1 A     a    
#> 2 B     b    
#> 3 C     c    
# Subsetting the columns, the data.table way, with formulas
dtrm[~1:2, ~.(b)]
#>         b
#>    <char>
#> 1:      a
#> 2:      b
dtrm[~1:2, ~b] # If not enclosed in .(), returns a vector instead
#> [1] "a" "b"
# Precautions are needed here because it is NOT referentially transparent:
dtrm[, ~..sel] # In data.table language, this is how you access `sel`
#> # A data.trame: [3 × 2]
#>   c     b    
#>   <fct> <chr>
#> 1 A     a    
#> 2 B     b    
#> 3 C     c    

# Extended data.table syntax using i, j, by, or keyby with formulas
# Warning: due to precedence of operators, you must use braces here!
dtrm[, ~{d := paste0(b, c)}] # Changed in place (by reference!)
#> # A data.trame: [3 × 4]
#>       a b     c     d    
#>   <int> <chr> <fct> <chr>
#> 1     1 a     A     aA   
#> 2     2 b     B     bB   
#> 3     3 c     C     cC   
# Another form that does not need braces, but is less readable:
dtrm[, ~`:=`(e, paste0(b, a))]
#> # A data.trame: [3 × 5]
#>       a b     c     d     e    
#>   <int> <chr> <fct> <chr> <chr>
#> 1     1 a     A     aA    a1   
#> 2     2 b     B     bB    b2   
#> 3     3 c     C     cC    c3   
# or equivalently:
dtrm[, ~let(e = paste0(b, a))]
#> # A data.trame: [3 × 5]
#>       a b     c     d     e    
#>   <int> <chr> <fct> <chr> <chr>
#> 1     1 a     A     aA    a1   
#> 2     2 b     B     bB    b2   
#> 3     3 c     C     cC    c3   
# In this case, it is much better to just replace `:=` by `~`, but internally
# it uses set(). It is faster, but much more limited and cannot use by or
# or keyby:
dtrm[, f ~ paste0(c, a)]
#> # A data.trame: [3 × 6]
#>       a b     c     d     e     f    
#>   <int> <chr> <fct> <chr> <chr> <chr>
#> 1     1 a     A     aA    a1    A1   
#> 2     2 b     B     bB    b2    B2   
#> 3     3 c     C     cC    c3    C3   
# One can also use standard evaluation in that case using with = FALSE
dtrm[, f ~ paste0(dtrm$c, dtrm$a), with = FALSE]
#> # A data.trame: [3 × 6]
#>       a b     c     d     e     f    
#>   <int> <chr> <fct> <chr> <chr> <chr>
#> 1     1 a     A     aA    a1    A1   
#> 2     2 b     B     bB    b2    B2   
#> 3     3 c     C     cC    c3    C3   
#
# Take care when you provide only one argument:
# If it is a formula, the data.table syntax is used (select rows)
# otherwise, the data.frame syntax applies, and columns are selected!
dtrm[1:2] # All rows and 2 first columns
#> # A data.trame: [3 × 2]
#>       a b    
#>   <int> <chr>
#> 1     1 a    
#> 2     2 b    
#> 3     3 c    
dtrm[~1:2] # All columns and 2 first rows!
#> # A data.trame: [2 × 6]
#>       a b     c     d     e     f    
#>   <int> <chr> <fct> <chr> <chr> <chr>
#> 1     1 a     A     aA    a1    A1   
#> 2     2 b     B     bB    b2    B2   

# For $, on the contrary to data.frame/data.table, but like tibble,
# no partial match is allowed (returns NULL with a warning)
dtrm$count <- dtrm$c
names(dtrm)
#> [1] "a"     "b"     "c"     "d"     "e"     "f"     "count"
dtrm$count #OK
#> [1] A B C
#> Levels: A B C
#dtrm$co # Not OK, no partial match allowed