loading...

Functions for pivoting data between long and wide formats.

These are SciViews::R versions of tidyverse functions with standard evaluation and formula-based non-standard evaluation (ending with underscore _). They work with data.frame, data.table, and tibbles.

Functions:

  • pivot_longer_() - Convert data from wide to long format

  • pivot_wider_() - Convert data from long to wide format

pivot_longer_(
  .data = (.),
  cols,
  ...,
  cols_vary = "fastest",
  names_to = "name",
  names_prefix = NULL,
  values_to = "value",
  values_drop_na = FALSE,
  factor = FALSE
)

pivot_wider_(
  .data = (.),
  ...,
  id_cols = NULL,
  id_expand = FALSE,
  names_from = "",
  names_prefix = "",
  names_vary = "fastest",
  values_from = "",
  values_fill = NULL,
  values_fn = "last",
  drop = TRUE,
  sort = FALSE
)

Arguments

.data

A data frame (data.frame, data.table, or tibble)

cols

For pivot_longer_(): columns to pivot into longer format. Use tidy-select syntax with formulas (e.g., ~col1:col5, ~starts_with("x")), or provide column positions or names as a vector.

...

Additional arguments (currently unused, for compatibility)

cols_vary

Character. Either "fastest" (default) or "slowest". If "fastest", keeps individual rows from cols close together in the output. If "slowest", keeps individual columns from cols close together.

names_to

Character string specifying the name of the column to create from the column names being pivoted. Default is "name".

names_prefix

Character. A regular expression used to remove matching text from the start of each variable name before creating the names column.

values_to

Character string specifying the name of the column to create from the cell values. Default is "value".

values_drop_na

Logical. If TRUE, rows containing only NA values in the values_to column are dropped from the result. Default is FALSE.

factor

Logical. If TRUE, convert the names and values columns to factors. If FALSE (default), leave them as character strings.

id_cols

For pivot_wider_(): columns that uniquely identify each observation. Use tidy-select syntax or NULL (default) to use all columns except names_from and values_from.

id_expand

Logical. If TRUE, expand the id_cols to include all possible combinations. Default is FALSE.

names_from

For pivot_wider_(): column(s) containing the names for the new columns. Provide as unquoted names or character vector. Default is name.

names_vary

Character. How column names are constructed when multiple names_from or values_from columns are provided: "fastest" (default), "slowest", "transpose", or "slowtranspose".

values_from

For pivot_wider_(): column(s) containing the values for the new columns. Provide as unquoted names or character vector. Default is value.

values_fill

Optional scalar value to use for missing combinations. Default is NULL (use NA).

values_fn

Function to apply when there are multiple values for a cell. Can be a string naming an internal function ("first", "last" (default), "count", "sum", "mean", "min", "max"), or a formula calling an external function with first argument .x (e.g., ~fmedian(.x, na.rm = TRUE)).

drop

Logical. Should unused factor levels be dropped? Default is TRUE.

sort

Logical. If TRUE, sort the result so the largest groups are shown on top. Default is FALSE.

Value

A data frame of the same type as .data in the pivoted format. pivot_longer_() returns a data frame with more rows and fewer columns. pivot_wider_() returns a data frame with fewer rows and more columns.

Examples

library(svTidy)

# Create sample wide data
wide_data <- data.frame(
  id = 1:3,
  year = c(2020, 2021, 2022),
  q1 = c(100, 110, 120),
  q2 = c(105, 115, 125),
  q3 = c(110, 120, 130),
  q4 = c(115, 125, 135)
)

# Pivot from wide to long format
wide_data |>
  pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales")
#>    id year quarter sales
#> 1   1 2020      q1   100
#> 4   1 2020      q2   105
#> 7   1 2020      q3   110
#> 10  1 2020      q4   115
#> 2   2 2021      q1   110
#> 5   2 2021      q2   115
#> 8   2 2021      q3   120
#> 11  2 2021      q4   125
#> 3   3 2022      q1   120
#> 6   3 2022      q2   125
#> 9   3 2022      q3   130
#> 12  3 2022      q4   135

# Use tidy-select helpers
wide_data |>
  pivot_longer_(~starts_with("q"), names_to = "quarter", values_to = "sales")
#>    id year quarter sales
#> 1   1 2020      q1   100
#> 4   1 2020      q2   105
#> 7   1 2020      q3   110
#> 10  1 2020      q4   115
#> 2   2 2021      q1   110
#> 5   2 2021      q2   115
#> 8   2 2021      q3   120
#> 11  2 2021      q4   125
#> 3   3 2022      q1   120
#> 6   3 2022      q2   125
#> 9   3 2022      q3   130
#> 12  3 2022      q4   135

# Remove prefix from column names
wide_data |>
  pivot_longer_(
    ~q1:q4,
    names_to = "quarter",
    values_to = "sales",
    names_prefix = "q"
  )
#>    id year quarter sales
#> 1   1 2020       1   100
#> 4   1 2020       2   105
#> 7   1 2020       3   110
#> 10  1 2020       4   115
#> 2   2 2021       1   110
#> 5   2 2021       2   115
#> 8   2 2021       3   120
#> 11  2 2021       4   125
#> 3   3 2022       1   120
#> 6   3 2022       2   125
#> 9   3 2022       3   130
#> 12  3 2022       4   135

# Control row ordering with cols_vary
wide_data |>
  pivot_longer_(~q1:q4, cols_vary = "slowest")
#>    id year name value
#> 1   1 2020   q1   100
#> 2   2 2021   q1   110
#> 3   3 2022   q1   120
#> 4   1 2020   q2   105
#> 5   2 2021   q2   115
#> 6   3 2022   q2   125
#> 7   1 2020   q3   110
#> 8   2 2021   q3   120
#> 9   3 2022   q3   130
#> 10  1 2020   q4   115
#> 11  2 2021   q4   125
#> 12  3 2022   q4   135

# Drop NA values
wide_na <- wide_data
wide_na$q3[2] <- NA
wide_na |>
  pivot_longer_(~q1:q4, values_drop_na = TRUE)
#>    id year name value
#> 1   1 2020   q1   100
#> 2   1 2020   q2   105
#> 3   1 2020   q3   110
#> 4   1 2020   q4   115
#> 5   2 2021   q1   110
#> 6   2 2021   q2   115
#> 7   2 2021   q4   125
#> 8   3 2022   q1   120
#> 9   3 2022   q2   125
#> 10  3 2022   q3   130
#> 11  3 2022   q4   135

# Convert to factors
wide_data |>
  pivot_longer_(~q1:q4, factor = TRUE)
#>    id year name value
#> 1   1 2020   q1   100
#> 4   1 2020   q2   105
#> 7   1 2020   q3   110
#> 10  1 2020   q4   115
#> 2   2 2021   q1   110
#> 5   2 2021   q2   115
#> 8   2 2021   q3   120
#> 11  2 2021   q4   125
#> 3   3 2022   q1   120
#> 6   3 2022   q2   125
#> 9   3 2022   q3   130
#> 12  3 2022   q4   135

# Create sample long data
long_data <- data.frame(
  id = rep(1:3, each = 4),
  year = rep(c(2020, 2021, 2022), each = 4),
  quarter = rep(c("q1", "q2", "q3", "q4"), 3),
  sales = c(100, 105, 110, 115, 110, 115, 120, 125, 120, 125, 130, 135)
)

# Pivot from long to wide format
long_data |>
  pivot_wider_(names_from = "quarter", values_from = "sales")
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Specify id columns explicitly
long_data |>
  pivot_wider_(
    id_cols = ~c(id, year),
    names_from = "quarter",
    values_from = "sales"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Add prefix to new column names
long_data |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    names_prefix = "sales_"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Fill missing values
long_data |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fill = 0
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Handle multiple values with aggregation
long_dup <- rbind(long_data, long_data[1:3, ])
long_dup |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fn = "mean"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Use custom aggregation function
long_dup |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fn = ~sum(.x)
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 200 210 220 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Multiple names_from and values_from
long_multi <- data.frame(
  id = rep(1:2, each = 4),
  metric = rep(c("sales", "profit"), 4),
  quarter = rep(c("q1", "q2"), each = 2, times = 2),
  value = c(100, 20, 105, 22, 110, 24, 115, 26)
)
long_multi |>
  pivot_wider_(
    names_from = c("quarter", "metric"),
    values_from = "value"
  )
#>   id q1_sales q1_profit q2_sales q2_profit
#> 1  1      100        20      105        22
#> 2  2      110        24      115        26

# Round-trip: wide -> long -> wide
wide_data |>
  pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales") |>
  pivot_wider_(names_from = "quarter", values_from = "sales")
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135