Pivoting Functions

Functions for pivoting data between long and wide formats.

These are SciViews::R versions of tidyverse functions with standard evaluation and formula-based non-standard evaluation (ending with underscore _). They work with data.frame, data.table, and tibbles.

Functions:

pivot_longer_() - Convert data from wide to long format
pivot_wider_() - Convert data from long to wide format

pivot_longer_(
  .data = (.),
  cols,
  ...,
  cols_vary = "fastest",
  names_to = "name",
  names_prefix = NULL,
  values_to = "value",
  values_drop_na = FALSE,
  factor = FALSE
)

pivot_wider_(
  .data = (.),
  ...,
  id_cols = NULL,
  id_expand = FALSE,
  names_from = "",
  names_prefix = "",
  names_vary = "fastest",
  values_from = "",
  values_fill = NULL,
  values_fn = "last",
  drop = TRUE,
  sort = FALSE
)

Arguments

.data: A data frame (data.frame, data.table, or tibble)
cols: For pivot_longer_(): columns to pivot into longer format. Use tidy-select syntax with formulas (e.g., ~col1:col5, ~starts_with("x")), or provide column positions or names as a vector.
...: Additional arguments (currently unused, for compatibility)
cols_vary: Character. Either "fastest" (default) or "slowest". If "fastest", keeps individual rows from cols close together in the output. If "slowest", keeps individual columns from cols close together.
names_to: Character string specifying the name of the column to create from the column names being pivoted. Default is "name".
names_prefix: Character. A regular expression used to remove matching text from the start of each variable name before creating the names column.
values_to: Character string specifying the name of the column to create from the cell values. Default is "value".
values_drop_na: Logical. If TRUE, rows containing only NA values in the values_to column are dropped from the result. Default is FALSE.
factor: Logical. If TRUE, convert the names and values columns to factors. If FALSE (default), leave them as character strings.
id_cols: For pivot_wider_(): columns that uniquely identify each observation. Use tidy-select syntax or NULL (default) to use all columns except names_from and values_from.
id_expand: Logical. If TRUE, expand the id_cols to include all possible combinations. Default is FALSE.
names_from: For pivot_wider_(): column(s) containing the names for the new columns. Provide as unquoted names or character vector. Default is name.
names_vary: Character. How column names are constructed when multiple names_from or values_from columns are provided: "fastest" (default), "slowest", "transpose", or "slowtranspose".
values_from: For pivot_wider_(): column(s) containing the values for the new columns. Provide as unquoted names or character vector. Default is value.
values_fill: Optional scalar value to use for missing combinations. Default is NULL (use NA).
values_fn: Function to apply when there are multiple values for a cell. Can be a string naming an internal function ("first", "last" (default), "count", "sum", "mean", "min", "max"), or a formula calling an external function with first argument .x (e.g., ~fmedian(.x, na.rm = TRUE)).
drop: Logical. Should unused factor levels be dropped? Default is TRUE.
sort: Logical. If TRUE, sort the result so the largest groups are shown on top. Default is FALSE.

Value

A data frame of the same type as .data in the pivoted format. pivot_longer_() returns a data frame with more rows and fewer columns. pivot_wider_() returns a data frame with fewer rows and more columns.

Examples

library(svTidy)

# Create sample wide data
wide_data <- data.frame(
  id = 1:3,
  year = c(2020, 2021, 2022),
  q1 = c(100, 110, 120),
  q2 = c(105, 115, 125),
  q3 = c(110, 120, 130),
  q4 = c(115, 125, 135)
)

# Pivot from wide to long format
wide_data |>
  pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales")
#>    id year quarter sales
#> 1   1 2020      q1   100
#> 4   1 2020      q2   105
#> 7   1 2020      q3   110
#> 10  1 2020      q4   115
#> 2   2 2021      q1   110
#> 5   2 2021      q2   115
#> 8   2 2021      q3   120
#> 11  2 2021      q4   125
#> 3   3 2022      q1   120
#> 6   3 2022      q2   125
#> 9   3 2022      q3   130
#> 12  3 2022      q4   135

# Use tidy-select helpers
wide_data |>
  pivot_longer_(~starts_with("q"), names_to = "quarter", values_to = "sales")
#>    id year quarter sales
#> 1   1 2020      q1   100
#> 4   1 2020      q2   105
#> 7   1 2020      q3   110
#> 10  1 2020      q4   115
#> 2   2 2021      q1   110
#> 5   2 2021      q2   115
#> 8   2 2021      q3   120
#> 11  2 2021      q4   125
#> 3   3 2022      q1   120
#> 6   3 2022      q2   125
#> 9   3 2022      q3   130
#> 12  3 2022      q4   135

# Remove prefix from column names
wide_data |>
  pivot_longer_(
    ~q1:q4,
    names_to = "quarter",
    values_to = "sales",
    names_prefix = "q"
  )
#>    id year quarter sales
#> 1   1 2020       1   100
#> 4   1 2020       2   105
#> 7   1 2020       3   110
#> 10  1 2020       4   115
#> 2   2 2021       1   110
#> 5   2 2021       2   115
#> 8   2 2021       3   120
#> 11  2 2021       4   125
#> 3   3 2022       1   120
#> 6   3 2022       2   125
#> 9   3 2022       3   130
#> 12  3 2022       4   135

# Control row ordering with cols_vary
wide_data |>
  pivot_longer_(~q1:q4, cols_vary = "slowest")
#>    id year name value
#> 1   1 2020   q1   100
#> 2   2 2021   q1   110
#> 3   3 2022   q1   120
#> 4   1 2020   q2   105
#> 5   2 2021   q2   115
#> 6   3 2022   q2   125
#> 7   1 2020   q3   110
#> 8   2 2021   q3   120
#> 9   3 2022   q3   130
#> 10  1 2020   q4   115
#> 11  2 2021   q4   125
#> 12  3 2022   q4   135

# Drop NA values
wide_na <- wide_data
wide_na$q3[2] <- NA
wide_na |>
  pivot_longer_(~q1:q4, values_drop_na = TRUE)
#>    id year name value
#> 1   1 2020   q1   100
#> 2   1 2020   q2   105
#> 3   1 2020   q3   110
#> 4   1 2020   q4   115
#> 5   2 2021   q1   110
#> 6   2 2021   q2   115
#> 7   2 2021   q4   125
#> 8   3 2022   q1   120
#> 9   3 2022   q2   125
#> 10  3 2022   q3   130
#> 11  3 2022   q4   135

# Convert to factors
wide_data |>
  pivot_longer_(~q1:q4, factor = TRUE)
#>    id year name value
#> 1   1 2020   q1   100
#> 4   1 2020   q2   105
#> 7   1 2020   q3   110
#> 10  1 2020   q4   115
#> 2   2 2021   q1   110
#> 5   2 2021   q2   115
#> 8   2 2021   q3   120
#> 11  2 2021   q4   125
#> 3   3 2022   q1   120
#> 6   3 2022   q2   125
#> 9   3 2022   q3   130
#> 12  3 2022   q4   135

# Create sample long data
long_data <- data.frame(
  id = rep(1:3, each = 4),
  year = rep(c(2020, 2021, 2022), each = 4),
  quarter = rep(c("q1", "q2", "q3", "q4"), 3),
  sales = c(100, 105, 110, 115, 110, 115, 120, 125, 120, 125, 130, 135)
)

# Pivot from long to wide format
long_data |>
  pivot_wider_(names_from = "quarter", values_from = "sales")
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Specify id columns explicitly
long_data |>
  pivot_wider_(
    id_cols = ~c(id, year),
    names_from = "quarter",
    values_from = "sales"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Add prefix to new column names
long_data |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    names_prefix = "sales_"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Fill missing values
long_data |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fill = 0
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Handle multiple values with aggregation
long_dup <- rbind(long_data, long_data[1:3, ])
long_dup |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fn = "mean"
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Use custom aggregation function
long_dup |>
  pivot_wider_(
    names_from = "quarter",
    values_from = "sales",
    values_fn = ~sum(.x)
  )
#>   id year  q1  q2  q3  q4
#> 1  1 2020 200 210 220 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

# Multiple names_from and values_from
long_multi <- data.frame(
  id = rep(1:2, each = 4),
  metric = rep(c("sales", "profit"), 4),
  quarter = rep(c("q1", "q2"), each = 2, times = 2),
  value = c(100, 20, 105, 22, 110, 24, 115, 26)
)
long_multi |>
  pivot_wider_(
    names_from = c("quarter", "metric"),
    values_from = "value"
  )
#>   id q1_sales q1_profit q2_sales q2_profit
#> 1  1      100        20      105        22
#> 2  2      110        24      115        26

# Round-trip: wide -> long -> wide
wide_data |>
  pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales") |>
  pivot_wider_(names_from = "quarter", values_from = "sales")
#>   id year  q1  q2  q3  q4
#> 1  1 2020 100 105 110 115
#> 2  2 2021 110 115 120 125
#> 3  3 2022 120 125 130 135

Arguments

Value

See also

Examples