R/sciviews_functions.R
sciviews_functions.Rd
A SciViews::R version of the tidyverse functions in {dplyr}
and {tidyr} with standard evaluation, and non-standard evaluation trough
formulas. These functions end with an underscore _
. Avoid mixing tidy,
speedy and SciViews functions in the same pipeline.
list_sciviews_functions()
all_of(x)
as.grouped_df(x, ...)
as_grouped_df(x, ...)
# Default S3 method
as.grouped_df(x, ...)
# S3 method for class 'grouped_df'
as.grouped_df(x, ...)
# S3 method for class 'GRP_df'
as.grouped_df(x, ...)
# S3 method for class 'grouped_df'
print(x, ...)
group_vars_(.data = (.), return = "names")
group_rows_(.data = (.))
group_data_(.data = (.))
group_indices_(.data = (.), ...)
group_keys_(.data = (.), ...)
groups_(.data = (.))
group_size_(.data = (.))
n_groups_(.data = (.))
group_by_(
.data = (.),
...,
.add = FALSE,
.drop = NULL,
.sort = get_collapse("sort"),
.decreasing = FALSE,
.na.last = TRUE,
.return.groups = TRUE,
.return.order = .sort,
.method = "auto"
)
ungroup_(.data = (.), ..., .na.last = TRUE, .method = "auto")
rename_(.data = (.), ...)
rename_with_(.data = (.), .fn, .cols = ~everything(), ...)
filter_(.data = (.), ..., .by = NULL, .preserve = FALSE)
select_(.data = (.), ...)
mutate_(
.data = (.),
...,
.by = NULL,
.keep = "all",
.before = NULL,
.after = NULL,
.cols = NULL
)
transmute_(.data, ...)
summarise_(
.data = (.),
...,
.by = NULL,
.groups = "drop_last",
.keep.group_vars = TRUE,
.cols = NULL
)
summarize_(
.data = (.),
...,
.by = NULL,
.groups = "drop_last",
.keep.group_vars = TRUE,
.cols = NULL
)
reframe_(
.data,
...,
.by = NULL,
.groups = "drop",
.keep.group_vars = TRUE,
.cols = NULL
)
arrange_(
.data = (.),
...,
.by_group = FALSE,
.locale = "C",
.decreasing = FALSE
)
pull_(.data = (.), var = -1, name = NULL, ...)
join_(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = c("na", "never"),
multiple = "all",
unmatched = "drop",
relationship = NULL,
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL,
how = "full"
)
right_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = c("na", "never"),
multiple = "all",
unmatched = "drop",
relationship = NULL,
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
full_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = c("na", "never"),
multiple = "all",
relationship = NULL,
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
left_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = c("na", "never"),
multiple = "all",
unmatched = "drop",
relationship = NULL,
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
inner_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = c("na", "never"),
multiple = "all",
unmatched = "drop",
relationship = NULL,
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
semi_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
...,
na_matches = c("na", "never"),
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
anti_join_(
x = (.),
y,
by = NULL,
copy = FALSE,
...,
na_matches = c("na", "never"),
sort = FALSE,
verbose = 0,
column = NULL,
attr = NULL
)
bind_rows_(..., .id = NULL, .use_names = TRUE, .fill = TRUE)
bind_cols_(
...,
.name_repair = c("unique", "universal", "check_unique", "minimal")
)
slice_(.data = (.), ..., .by = NULL, .preserve = NULL)
slice_head_(.data = (.), ..., n = 1L, prop, by = NULL, sort = TRUE)
slice_tail_(.data = (.), ..., n = 1L, prop, by = NULL, sort = TRUE)
count_(
.data = (.),
...,
wt = NULL,
name = "n",
sort = FALSE,
decreasing = TRUE,
.drop = TRUE,
add = FALSE
)
tally_(.data = (.), wt = NULL, name = "n", sort = FALSE, decreasing = TRUE)
add_count_(
.data = (.),
...,
wt = NULL,
name = "n",
sort = FALSE,
decreasing = TRUE,
.drop = TRUE
)
add_tally_(.data = (.), wt = NULL, name = "n", sort = FALSE, decreasing = TRUE)
distinct_(.data = (.), ..., .keep_all = FALSE, .method = "auto")
drop_na_(.data = (.), ..., .na.attr = FALSE, .prop = 0)
replace_na_(.data = (.), replace, ..., v = NULL)
pivot_longer_(
.data = (.),
cols,
...,
cols_vary = "fastest",
names_to = "name",
names_prefix = NULL,
values_to = "value",
values_drop_na = FALSE,
factor = FALSE
)
pivot_wider_(
.data = (.),
...,
id_cols = NULL,
id_expand = FALSE,
names_from = name,
names_prefix = "",
names_vary = "fastest",
values_from = value,
values_fill = NULL,
values_fn = "last",
drop = TRUE,
sort = FALSE
)
uncount_(.data = (.), weights, ..., .remove = TRUE, .id = NULL)
unite_(.data = (.), col, ..., sep = "_", remove = TRUE, na.rm = FALSE)
fill_(.data = (.), ..., .direction = "down")
separate_(
.data = (.),
col,
into,
sep = "[^[:alnum:]]+",
remove = TRUE,
convert = FALSE,
extra = "warn",
fill = "warn",
fixed = FALSE,
...
)
A data frame (data.frame, data.table or tibble's tbl_df).
Arguments dependent to the context of the function and most of the time, not evaluated in a standard way (cf. the tidyverse approach).
A data frame (data.frame, data.table or tibble's tbl_df)
What to return: "data"
or 1
, "unique"
or 2
for unique
rows of grouping columns, "names"
or 3
(default) for names of grouping
columns, "indices"
or 4
for integer indices of grouping columns,
"named_indices"
or 5
for named indices, "logicial"
or 6
for logical
selection vector of grouping columns, or "named_logical"
or 7
for named
logical.
If TRUE
, the grouping variables are added to the existing ones.
Are levels with no observations dropped (TRUE
by default).
If TRUE
groups are sorted.
Is sorting done in decreasing order (FALSE
by default)?
How to treat missing values in groups? Assign them to the last
group by default (TRUE
).
If TRUE
, the grouping variables are returned in the GRP
object (default).
If TRUE
, the order of the grouping variables is
returned in the object (by default, same value as sort=
).
The algorithm to use for grouping: "radix"
, "hash"
, or
"auto"
(by default). "auto"
chose "radix"
when sort = TRUE
and
"hash"
otherwise.
A function to use.
The list of the column where to apply the transformation. For
the moment, only all existing columns, which means .cols = everything()
is implemented
A list of names of the columns to use for grouping the data.
When data is grouped, do we preserve grouping or recalculate it according to the new data frame obtained?
Which columns to keep. The default is "all"
, possible values
are "used"
, "unused"
, or "none"
(see mutate()
).
Place new columns before this one.
Place new columns after this one.
How to treat the grouping variables in the result? Possible
values are "drop_last"
(default), "drop"
(no grouping variables),
"keep"
(keep all grouping variables), or "rowwise"
(not implemented
yet).
If TRUE
(by default), the grouping variables are
kept in the result.
Logical. If TRUE
rows are first arranger by the grouping
variables in any. FALSE
by default.
The locale to sort character vectors in. If NULL
(default),
use "C"
locale.
A variable specified as a name, a positive or a negative integer
(counting from the end). The default is -1
and returns last variable.
The name of the new column in the output (n
by default, and no
existing column must have this name, or an error is generated).4
A second data frame.
A list of names of the columns to use for joining the two data
frames. Could also be a join specification created with dplyr::join_by()
,
but in this case, calculation is delegated to dplyr's join methods.
This argument is there for compatibility with the "t" matching functions, but it is not used here.
The suffix to the column names to use to differentiate the
columns that come from the first or the second data frame. By default it is
c(".x", ".y")
.
Should the join keys from both x
and y
be preserved in the
output? If NULL
, the default, joins on equality retain only the keys from
x
, while joins on inequality retain the keys from both inputs. If TRUE
,
all keys from both inputs are retained. If FALSE
, only keys from x
are
retained. For right and full joins, the data in key columns corresponding to
rows that only exist in y
are merged into the key columns from x
. Can't
be used when joining on inequality conditions. If keep = TRUE
, calculation
is delegated to dplyr join methods.
Should two NA
or two NaN
values match? "na"
, the
default, treats two NA
or two NaN
values as equal, like %in%
,
match()
, and merge()
. "never"
treats two NA
or two NaN
values as
different, and will never match them together or to any other values. This is
similar to joins for database sources and to
base::merge(incomparables = NA)
. If "never"
, calculation is delegated to
dplyr join methods.
Handling of rows in x
with multiple matches in y
. For
each row of x
: "all"
, the default, returns every match detected in y
.
This is the same behavior as SQL. "any"
returns one match detected in y
,
with no guarantees on which match will be returned. It is often faster than
"first"
and "last"
in dplyr, but avoid it here. "first"
returns the
first match detected in y
. "last"
returns the last match detected in y
.
For "any"
and "last"
, calculation is delegated to dplyr join methods, and
in the case of right join, also for "first"
..
How should unmatched keys that would result in dropped rows
be handled? "drop"
drops unmatched keys from the result. "error"
throws
an error if unmatched keys are detected. Also, a named list of the form
list(x = 1, y = 0.5, fail = "warning")
can be used when calculation is
not delegated to dplyr. The first two elements are the proportions that
must match, and the third element is "message"
, "warning"
, or "error"
.
Handling of the expected relationship between the keys of
x
and y
. If the expectations chosen from the list below are invalidated,
an error is thrown. NULL
, the default, doesn't expect there to be any
relationship between x
and y
. However, for equality joins it will check
for a many-to-many relationship (which is typically unexpected) and will warn
if one occurs, encouraging you to either take a closer look at your inputs or
make this relationship explicit by specifying "many-to-many". "one-to-one"
expects: Each row in x matches at most 1 row in y. Each row in y matches at
most 1 row in x. "one-to-many"
expects: Each row in y matches at most 1
row in x. "many-to-one" expects: Each row in x matches at most 1 row in y.
"many-to-many"
doesn't perform any relationship checks, but is provided to
allow you to be explicit about this relationship if you know it exists.
relationship
doesn't handle cases where there are zero matches. For that,
see unmatched
.
If TRUE
largest group will be shown on top.
integer. Prints information about the join. One of 0
(off),
1
(default) or 2
(additionally prints the classes of the by
columns).
name for an extra column to generate in the output indicating
which dataset a record came from. TRUE
calls this column ".join"
, or give
another name.
name for attribute providing information about the join performed
(including the output of collapse::fmatch()
) to the result. TRUE
calls
this attribute "join.match"
or give your own name. Note: this also invokes
the count argument to collapse::fmatch()
.
Can be "full" (default), "inner", "left", "right", "semi", or "anti".
The name of the column for the origin id, either names if all other arguments are named, or numbers.
If TRUE
(default), bind by matching names, if FALSE
, bind by
position. If NULL
, warns if all items do not have the same name in the
same order, and then proceeds as if FALSE
(but will be as if TRUE
in
the future).
If TRUE
(default), fills missing columns with NA
or NULL
for missing list columns, if FALSE
, do not fill.
How should the name be "repaired" to avoid duplicate
column names? See dplyr::bind_cols()
for more details.
Number of rows to keep
Proportion of rows to keep, between 0 and 1. Provide either n
,
or prop
but not both simultaneously. If none is provided, n = 1
is used.
Frequency weights. Can be NULL
or a variable. Use data masking.
Is sorting done in decreasing order (FALSE
by default)?
Add counts to the data frame (FALSE
by default).
If TRUE
keep all variables in .data
.
logical. TRUE
adds an attribute containing the removed
cases. For compatibility reasons this is exactly the same format as
na.omit()
, i.e. the attribute is called "na.action" and of class omit
numeric. The proportion missing values in each case for the case to be considered as missing required to keep a
If data
is a vector, a unique value to replace NA
s,
otherwise, a list of values, one per column of the data frame.
a vector where to replace NAs.
A selection of the columns using tidy-select syntax,
seetidyr::pivot_longer()
.
character. Either "fastest" or "slowest". If "fastest"
(default), keep individual rows from cols
close together. If "slowest",
keeps individual columns from `cols' close together.
A character vector with the name or names of the columns for the names.
character. A regular expression used to remove matching text from the start of each variable name.
A string with the name of the column that receives the values.
logical. If TRUE
, drop rows with only NA
s in the
values_to
column.
logical. If TRUE
, convert the names and labels into factors,
if FALSE
(default) leave then as character strings (but slower for
subsequent filtering).
A set of columns that uniquely identify each observation.
logical. If TRUE
, expand the id_cols
.
The column or columns containing the names (use tidy selection and do not quote the names).
character. How the various column names are made: "fastest" (default), "slowest", "transpose", or "slowtranspose".
Idem for the column or columns that contain the values.
Optionally, a scalar value to use for missing values.
Either the name of an internal function (as a string) :
"first", "last" (default), "count", "sum", "mean", "min", or "max". Could
also be a formula calling an external function with first argument being .x
like ~fmedian(.x, na.rm = TRUE)
.`
Drop unused factor levels or not.
A vector of weight to use to "uncount" data
.
If TRUE
, and weights
is the name of a column, that column
is removed from data
.
The name quoted or not of the new column with united variable.
Separator to use between values for united or separated columns.
If TRUE
the initial columns that are separated are also
removed from data
.
If TRUE
, NA
s are eliminated before uniting the values.
Direction in which to fill missing data: "down"
(by
default), "up"
, or "downup"
(first down, then up), "updown"
(the opposite).
Name of the new column to put separated variables. Use NA
for
items to drop.
If 'TRUE
resulting values are converted into numeric,
integer or logical.
When sep
is a character vector what happens when there are too
many pieces: "warn"
(default) issue a warning and drop extra items,
"drop"
does the same without warning and "merge"
merges the extra items
with the last one.
When sep
is a character vector what happens when there are not
enough pieces: "warn"
(default) issue a warning and fill with NA
s at
right, so does without warning "right"
, and "left"
fills with NA
s at
left.
logical. If TRUE
, sep
is a fixed string, otherwise it is a
(perl) regular expression.
A data frame, or for replace_na()
a vector or a data frame.
See corresponding "non-SciViews" function for the full help page with indication of the return values.
The summarise_()
function does not support n()
as does
dplyr::summarise()
. You can use svBase::fn()
instead, but then, you must
give a variable name as argument. The svBase::fn()
alternative can also be
used in dplyr::summarise()
for homogeneous syntax between the two.
From {dplyr}, the slice_min()
, slice_max()
and splice_sample()
functions are not added yet.
From {tidyr} tidyr::expand()
, tidyr::chop()
, tidyr::unchop()
,
tidyr::nest()
, tidyr::unnest()
, tidyr::unnest_longer()
,
tidyr::unnest_wider()
, tidyr::hoist()
, tidyr::pack()
and
tidyr::unpack()
are not implemented yet.
# TODO...