
Functions for pivoting data between long and wide formats.
These are SciViews::R versions of tidyverse functions with standard
evaluation and formula-based non-standard evaluation (ending with underscore
_). They work with data.frame, data.table, and tibbles.
Functions:
pivot_longer_() - Convert data from wide to long format
pivot_wider_() - Convert data from long to wide format
pivot_longer_(
.data = (.),
cols,
...,
cols_vary = "fastest",
names_to = "name",
names_prefix = NULL,
values_to = "value",
values_drop_na = FALSE,
factor = FALSE
)
pivot_wider_(
.data = (.),
...,
id_cols = NULL,
id_expand = FALSE,
names_from = "",
names_prefix = "",
names_vary = "fastest",
values_from = "",
values_fill = NULL,
values_fn = "last",
drop = TRUE,
sort = FALSE
)A data frame (data.frame, data.table, or tibble)
For pivot_longer_(): columns to pivot into longer format. Use
tidy-select syntax with formulas (e.g., ~col1:col5, ~starts_with("x")),
or provide column positions or names as a vector.
Additional arguments (currently unused, for compatibility)
Character. Either "fastest" (default) or "slowest". If
"fastest", keeps individual rows from cols close together in the output.
If "slowest", keeps individual columns from cols close together.
Character string specifying the name of the column to create
from the column names being pivoted. Default is "name".
Character. A regular expression used to remove matching text from the start of each variable name before creating the names column.
Character string specifying the name of the column to create
from the cell values. Default is "value".
Logical. If TRUE, rows containing only NA values in
the values_to column are dropped from the result. Default is FALSE.
Logical. If TRUE, convert the names and values columns to
factors. If FALSE (default), leave them as character strings.
For pivot_wider_(): columns that uniquely identify each
observation. Use tidy-select syntax or NULL (default) to use all columns
except names_from and values_from.
Logical. If TRUE, expand the id_cols to include all
possible combinations. Default is FALSE.
For pivot_wider_(): column(s) containing the names for
the new columns. Provide as unquoted names or character vector. Default is
name.
Character. How column names are constructed when multiple
names_from or values_from columns are provided: "fastest" (default),
"slowest", "transpose", or "slowtranspose".
For pivot_wider_(): column(s) containing the values for
the new columns. Provide as unquoted names or character vector. Default is
value.
Optional scalar value to use for missing combinations.
Default is NULL (use NA).
Function to apply when there are multiple values for a cell.
Can be a string naming an internal function ("first", "last" (default),
"count", "sum", "mean", "min", "max"), or a formula calling an
external function with first argument .x (e.g., ~fmedian(.x, na.rm = TRUE)).
Logical. Should unused factor levels be dropped? Default is TRUE.
Logical. If TRUE, sort the result so the largest groups are shown
on top. Default is FALSE.
A data frame of the same type as .data in the pivoted format.
pivot_longer_() returns a data frame with more rows and fewer columns.
pivot_wider_() returns a data frame with fewer rows and more columns.
library(svTidy)
# Create sample wide data
wide_data <- data.frame(
id = 1:3,
year = c(2020, 2021, 2022),
q1 = c(100, 110, 120),
q2 = c(105, 115, 125),
q3 = c(110, 120, 130),
q4 = c(115, 125, 135)
)
# Pivot from wide to long format
wide_data |>
pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales")
#> id year quarter sales
#> 1 1 2020 q1 100
#> 4 1 2020 q2 105
#> 7 1 2020 q3 110
#> 10 1 2020 q4 115
#> 2 2 2021 q1 110
#> 5 2 2021 q2 115
#> 8 2 2021 q3 120
#> 11 2 2021 q4 125
#> 3 3 2022 q1 120
#> 6 3 2022 q2 125
#> 9 3 2022 q3 130
#> 12 3 2022 q4 135
# Use tidy-select helpers
wide_data |>
pivot_longer_(~starts_with("q"), names_to = "quarter", values_to = "sales")
#> id year quarter sales
#> 1 1 2020 q1 100
#> 4 1 2020 q2 105
#> 7 1 2020 q3 110
#> 10 1 2020 q4 115
#> 2 2 2021 q1 110
#> 5 2 2021 q2 115
#> 8 2 2021 q3 120
#> 11 2 2021 q4 125
#> 3 3 2022 q1 120
#> 6 3 2022 q2 125
#> 9 3 2022 q3 130
#> 12 3 2022 q4 135
# Remove prefix from column names
wide_data |>
pivot_longer_(
~q1:q4,
names_to = "quarter",
values_to = "sales",
names_prefix = "q"
)
#> id year quarter sales
#> 1 1 2020 1 100
#> 4 1 2020 2 105
#> 7 1 2020 3 110
#> 10 1 2020 4 115
#> 2 2 2021 1 110
#> 5 2 2021 2 115
#> 8 2 2021 3 120
#> 11 2 2021 4 125
#> 3 3 2022 1 120
#> 6 3 2022 2 125
#> 9 3 2022 3 130
#> 12 3 2022 4 135
# Control row ordering with cols_vary
wide_data |>
pivot_longer_(~q1:q4, cols_vary = "slowest")
#> id year name value
#> 1 1 2020 q1 100
#> 2 2 2021 q1 110
#> 3 3 2022 q1 120
#> 4 1 2020 q2 105
#> 5 2 2021 q2 115
#> 6 3 2022 q2 125
#> 7 1 2020 q3 110
#> 8 2 2021 q3 120
#> 9 3 2022 q3 130
#> 10 1 2020 q4 115
#> 11 2 2021 q4 125
#> 12 3 2022 q4 135
# Drop NA values
wide_na <- wide_data
wide_na$q3[2] <- NA
wide_na |>
pivot_longer_(~q1:q4, values_drop_na = TRUE)
#> id year name value
#> 1 1 2020 q1 100
#> 2 1 2020 q2 105
#> 3 1 2020 q3 110
#> 4 1 2020 q4 115
#> 5 2 2021 q1 110
#> 6 2 2021 q2 115
#> 7 2 2021 q4 125
#> 8 3 2022 q1 120
#> 9 3 2022 q2 125
#> 10 3 2022 q3 130
#> 11 3 2022 q4 135
# Convert to factors
wide_data |>
pivot_longer_(~q1:q4, factor = TRUE)
#> id year name value
#> 1 1 2020 q1 100
#> 4 1 2020 q2 105
#> 7 1 2020 q3 110
#> 10 1 2020 q4 115
#> 2 2 2021 q1 110
#> 5 2 2021 q2 115
#> 8 2 2021 q3 120
#> 11 2 2021 q4 125
#> 3 3 2022 q1 120
#> 6 3 2022 q2 125
#> 9 3 2022 q3 130
#> 12 3 2022 q4 135
# Create sample long data
long_data <- data.frame(
id = rep(1:3, each = 4),
year = rep(c(2020, 2021, 2022), each = 4),
quarter = rep(c("q1", "q2", "q3", "q4"), 3),
sales = c(100, 105, 110, 115, 110, 115, 120, 125, 120, 125, 130, 135)
)
# Pivot from long to wide format
long_data |>
pivot_wider_(names_from = "quarter", values_from = "sales")
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Specify id columns explicitly
long_data |>
pivot_wider_(
id_cols = ~c(id, year),
names_from = "quarter",
values_from = "sales"
)
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Add prefix to new column names
long_data |>
pivot_wider_(
names_from = "quarter",
values_from = "sales",
names_prefix = "sales_"
)
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Fill missing values
long_data |>
pivot_wider_(
names_from = "quarter",
values_from = "sales",
values_fill = 0
)
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Handle multiple values with aggregation
long_dup <- rbind(long_data, long_data[1:3, ])
long_dup |>
pivot_wider_(
names_from = "quarter",
values_from = "sales",
values_fn = "mean"
)
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Use custom aggregation function
long_dup |>
pivot_wider_(
names_from = "quarter",
values_from = "sales",
values_fn = ~sum(.x)
)
#> id year q1 q2 q3 q4
#> 1 1 2020 200 210 220 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135
# Multiple names_from and values_from
long_multi <- data.frame(
id = rep(1:2, each = 4),
metric = rep(c("sales", "profit"), 4),
quarter = rep(c("q1", "q2"), each = 2, times = 2),
value = c(100, 20, 105, 22, 110, 24, 115, 26)
)
long_multi |>
pivot_wider_(
names_from = c("quarter", "metric"),
values_from = "value"
)
#> id q1_sales q1_profit q2_sales q2_profit
#> 1 1 100 20 105 22
#> 2 2 110 24 115 26
# Round-trip: wide -> long -> wide
wide_data |>
pivot_longer_(~q1:q4, names_to = "quarter", values_to = "sales") |>
pivot_wider_(names_from = "quarter", values_from = "sales")
#> id year q1 q2 q3 q4
#> 1 1 2020 100 105 110 115
#> 2 2 2021 110 115 120 125
#> 3 3 2022 120 125 130 135