loading...

A SciViews::R version of the tidyverse functions in {dplyr} and {tidyr} with standard evaluation, and non-standard evaluation trough formulas. These functions end with an underscore _. Avoid mixing tidy, speedy and SciViews functions in the same pipeline.

list_sciviews_functions()

all_of(x)

as.grouped_df(x, ...)

as_grouped_df(x, ...)

# Default S3 method
as.grouped_df(x, ...)

# S3 method for class 'grouped_df'
as.grouped_df(x, ...)

# S3 method for class 'GRP_df'
as.grouped_df(x, ...)

# S3 method for class 'grouped_df'
print(x, ...)

group_vars_(.data = (.), return = "names")

group_rows_(.data = (.))

group_data_(.data = (.))

group_indices_(.data = (.), ...)

group_keys_(.data = (.), ...)

groups_(.data = (.))

group_size_(.data = (.))

n_groups_(.data = (.))

group_by_(
  .data = (.),
  ...,
  .add = FALSE,
  .drop = NULL,
  .sort = get_collapse("sort"),
  .decreasing = FALSE,
  .na.last = TRUE,
  .return.groups = TRUE,
  .return.order = .sort,
  .method = "auto"
)

ungroup_(.data = (.), ..., .na.last = TRUE, .method = "auto")

rename_(.data = (.), ...)

rename_with_(.data = (.), .fn, .cols = ~everything(), ...)

filter_(.data = (.), ..., .by = NULL, .preserve = FALSE)

select_(.data = (.), ...)

mutate_(
  .data = (.),
  ...,
  .by = NULL,
  .keep = "all",
  .before = NULL,
  .after = NULL,
  .cols = NULL
)

transmute_(.data, ...)

summarise_(
  .data = (.),
  ...,
  .by = NULL,
  .groups = "drop_last",
  .keep.group_vars = TRUE,
  .cols = NULL
)

summarize_(
  .data = (.),
  ...,
  .by = NULL,
  .groups = "drop_last",
  .keep.group_vars = TRUE,
  .cols = NULL
)

reframe_(
  .data,
  ...,
  .by = NULL,
  .groups = "drop",
  .keep.group_vars = TRUE,
  .cols = NULL
)

arrange_(
  .data = (.),
  ...,
  .by_group = FALSE,
  .locale = "C",
  .decreasing = FALSE
)

pull_(.data = (.), var = -1, name = NULL, ...)

join_(
  x,
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = NULL,
  na_matches = c("na", "never"),
  multiple = "all",
  unmatched = "drop",
  relationship = NULL,
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL,
  how = "full"
)

right_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = NULL,
  na_matches = c("na", "never"),
  multiple = "all",
  unmatched = "drop",
  relationship = NULL,
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

full_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = NULL,
  na_matches = c("na", "never"),
  multiple = "all",
  relationship = NULL,
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

left_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = NULL,
  na_matches = c("na", "never"),
  multiple = "all",
  unmatched = "drop",
  relationship = NULL,
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

inner_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  suffix = c(".x", ".y"),
  ...,
  keep = NULL,
  na_matches = c("na", "never"),
  multiple = "all",
  unmatched = "drop",
  relationship = NULL,
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

semi_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  ...,
  na_matches = c("na", "never"),
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

anti_join_(
  x = (.),
  y,
  by = NULL,
  copy = FALSE,
  ...,
  na_matches = c("na", "never"),
  sort = FALSE,
  verbose = 0,
  column = NULL,
  attr = NULL
)

bind_rows_(..., .id = NULL, .use_names = TRUE, .fill = TRUE)

bind_cols_(
  ...,
  .name_repair = c("unique", "universal", "check_unique", "minimal")
)

slice_(.data = (.), ..., .by = NULL, .preserve = NULL)

slice_head_(.data = (.), ..., n = 1L, prop, by = NULL, sort = TRUE)

slice_tail_(.data = (.), ..., n = 1L, prop, by = NULL, sort = TRUE)

count_(
  .data = (.),
  ...,
  wt = NULL,
  name = "n",
  sort = FALSE,
  decreasing = TRUE,
  .drop = TRUE,
  add = FALSE
)

tally_(.data = (.), wt = NULL, name = "n", sort = FALSE, decreasing = TRUE)

add_count_(
  .data = (.),
  ...,
  wt = NULL,
  name = "n",
  sort = FALSE,
  decreasing = TRUE,
  .drop = TRUE
)

add_tally_(.data = (.), wt = NULL, name = "n", sort = FALSE, decreasing = TRUE)

distinct_(.data = (.), ..., .keep_all = FALSE, .method = "auto")

drop_na_(.data = (.), ..., .na.attr = FALSE, .prop = 0)

replace_na_(.data = (.), replace, ..., v = NULL)

pivot_longer_(
  .data = (.),
  cols,
  ...,
  cols_vary = "fastest",
  names_to = "name",
  names_prefix = NULL,
  values_to = "value",
  values_drop_na = FALSE,
  factor = FALSE
)

pivot_wider_(
  .data = (.),
  ...,
  id_cols = NULL,
  id_expand = FALSE,
  names_from = name,
  names_prefix = "",
  names_vary = "fastest",
  values_from = value,
  values_fill = NULL,
  values_fn = "last",
  drop = TRUE,
  sort = FALSE
)

uncount_(.data = (.), weights, ..., .remove = TRUE, .id = NULL)

unite_(.data = (.), col, ..., sep = "_", remove = TRUE, na.rm = FALSE)

fill_(.data = (.), ..., .direction = "down")

separate_(
  .data = (.),
  col,
  into,
  sep = "[^[:alnum:]]+",
  remove = TRUE,
  convert = FALSE,
  extra = "warn",
  fill = "warn",
  fixed = FALSE,
  ...
)

Arguments

x

A data frame (data.frame, data.table or tibble's tbl_df).

...

Arguments dependent to the context of the function and most of the time, not evaluated in a standard way (cf. the tidyverse approach).

.data

A data frame (data.frame, data.table or tibble's tbl_df)

return

What to return: "data" or 1, "unique" or 2 for unique rows of grouping columns, "names" or 3 (default) for names of grouping columns, "indices" or 4 for integer indices of grouping columns, "named_indices" or 5 for named indices, "logicial" or 6 for logical selection vector of grouping columns, or "named_logical" or 7 for named logical.

.add

If TRUE, the grouping variables are added to the existing ones.

.drop

Are levels with no observations dropped (TRUE by default).

.sort

If TRUE groups are sorted.

.decreasing

Is sorting done in decreasing order (FALSE by default)?

.na.last

How to treat missing values in groups? Assign them to the last group by default (TRUE).

.return.groups

If TRUE, the grouping variables are returned in the GRP object (default).

.return.order

If TRUE, the order of the grouping variables is returned in the object (by default, same value as sort=).

.method

The algorithm to use for grouping: "radix", "hash", or "auto" (by default). "auto" chose "radix" when sort = TRUE and "hash" otherwise.

.fn

A function to use.

.cols

The list of the column where to apply the transformation. For the moment, only all existing columns, which means .cols = everything() is implemented

.by

A list of names of the columns to use for grouping the data.

.preserve

When data is grouped, do we preserve grouping or recalculate it according to the new data frame obtained?

.keep

Which columns to keep. The default is "all", possible values are "used", "unused", or "none" (see mutate()).

.before

Place new columns before this one.

.after

Place new columns after this one.

.groups

How to treat the grouping variables in the result? Possible values are "drop_last" (default), "drop" (no grouping variables), "keep" (keep all grouping variables), or "rowwise" (not implemented yet).

.keep.group_vars

If TRUE (by default), the grouping variables are kept in the result.

.by_group

Logical. If TRUE rows are first arranger by the grouping variables in any. FALSE by default.

.locale

The locale to sort character vectors in. If NULL(default), use "C" locale.

var

A variable specified as a name, a positive or a negative integer (counting from the end). The default is -1 and returns last variable.

name

The name of the new column in the output (n by default, and no existing column must have this name, or an error is generated).4

y

A second data frame.

by

A list of names of the columns to use for joining the two data frames. Could also be a join specification created with dplyr::join_by(), but in this case, calculation is delegated to dplyr's join methods.

copy

This argument is there for compatibility with the "t" matching functions, but it is not used here.

suffix

The suffix to the column names to use to differentiate the columns that come from the first or the second data frame. By default it is c(".x", ".y").

keep

Should the join keys from both x and y be preserved in the output? If NULL, the default, joins on equality retain only the keys from x, while joins on inequality retain the keys from both inputs. If TRUE, all keys from both inputs are retained. If FALSE, only keys from x are retained. For right and full joins, the data in key columns corresponding to rows that only exist in y are merged into the key columns from x. Can't be used when joining on inequality conditions. If keep = TRUE, calculation is delegated to dplyr join methods.

na_matches

Should two NA or two NaN values match? "na", the default, treats two NA or two NaN values as equal, like %in%, match(), and merge(). "never" treats two NA or two NaN values as different, and will never match them together or to any other values. This is similar to joins for database sources and to base::merge(incomparables = NA). If "never", calculation is delegated to dplyr join methods.

multiple

Handling of rows in x with multiple matches in y. For each row of x: "all", the default, returns every match detected in y. This is the same behavior as SQL. "any" returns one match detected in y, with no guarantees on which match will be returned. It is often faster than "first" and "last" in dplyr, but avoid it here. "first" returns the first match detected in y. "last" returns the last match detected in y. For "any" and "last", calculation is delegated to dplyr join methods, and in the case of right join, also for "first"..

unmatched

How should unmatched keys that would result in dropped rows be handled? "drop" drops unmatched keys from the result. "error" throws an error if unmatched keys are detected. Also, a named list of the form list(x = 1, y = 0.5, fail = "warning")can be used when calculation is not delegated to dplyr. The first two elements are the proportions that must match, and the third element is "message", "warning", or "error".

relationship

Handling of the expected relationship between the keys of x and y. If the expectations chosen from the list below are invalidated, an error is thrown. NULL, the default, doesn't expect there to be any relationship between x and y. However, for equality joins it will check for a many-to-many relationship (which is typically unexpected) and will warn if one occurs, encouraging you to either take a closer look at your inputs or make this relationship explicit by specifying "many-to-many". "one-to-one" expects: Each row in x matches at most 1 row in y. Each row in y matches at most 1 row in x. "one-to-many" expects: Each row in y matches at most 1 row in x. "many-to-one" expects: Each row in x matches at most 1 row in y. "many-to-many" doesn't perform any relationship checks, but is provided to allow you to be explicit about this relationship if you know it exists. relationship doesn't handle cases where there are zero matches. For that, see unmatched.

sort

If TRUE largest group will be shown on top.

verbose

integer. Prints information about the join. One of 0 (off), 1 (default) or 2 (additionally prints the classes of the by columns).

column

name for an extra column to generate in the output indicating which dataset a record came from. TRUE calls this column ".join", or give another name.

attr

name for attribute providing information about the join performed (including the output of collapse::fmatch()) to the result. TRUE calls this attribute "join.match" or give your own name. Note: this also invokes the count argument to collapse::fmatch().

how

Can be "full" (default), "inner", "left", "right", "semi", or "anti".

.id

The name of the column for the origin id, either names if all other arguments are named, or numbers.

.use_names

If TRUE (default), bind by matching names, if FALSE, bind by position. If NULL, warns if all items do not have the same name in the same order, and then proceeds as if FALSE (but will be as if TRUE in the future).

.fill

If TRUE (default), fills missing columns with NA or NULL for missing list columns, if FALSE, do not fill.

.name_repair

How should the name be "repaired" to avoid duplicate column names? See dplyr::bind_cols() for more details.

n

Number of rows to keep

prop

Proportion of rows to keep, between 0 and 1. Provide either n, or prop but not both simultaneously. If none is provided, n = 1 is used.

wt

Frequency weights. Can be NULL or a variable. Use data masking.

decreasing

Is sorting done in decreasing order (FALSE by default)?

add

Add counts to the data frame (FALSE by default).

.keep_all

If TRUE keep all variables in .data.

.na.attr

logical. TRUE adds an attribute containing the removed cases. For compatibility reasons this is exactly the same format as na.omit(), i.e. the attribute is called "na.action" and of class omit

.prop

numeric. The proportion missing values in each case for the case to be considered as missing required to keep a

replace

If data is a vector, a unique value to replace NAs, otherwise, a list of values, one per column of the data frame.

v

a vector where to replace NAs.

cols

A selection of the columns using tidy-select syntax, seetidyr::pivot_longer().

cols_vary

character. Either "fastest" or "slowest". If "fastest" (default), keep individual rows from cols close together. If "slowest", keeps individual columns from `cols' close together.

names_to

A character vector with the name or names of the columns for the names.

names_prefix

character. A regular expression used to remove matching text from the start of each variable name.

values_to

A string with the name of the column that receives the values.

values_drop_na

logical. If TRUE, drop rows with only NAs in the values_to column.

factor

logical. If TRUE, convert the names and labels into factors, if FALSE (default) leave then as character strings (but slower for subsequent filtering).

id_cols

A set of columns that uniquely identify each observation.

id_expand

logical. If TRUE, expand the id_cols.

names_from

The column or columns containing the names (use tidy selection and do not quote the names).

names_vary

character. How the various column names are made: "fastest" (default), "slowest", "transpose", or "slowtranspose".

values_from

Idem for the column or columns that contain the values.

values_fill

Optionally, a scalar value to use for missing values.

values_fn

Either the name of an internal function (as a string) : "first", "last" (default), "count", "sum", "mean", "min", or "max". Could also be a formula calling an external function with first argument being .x like ~fmedian(.x, na.rm = TRUE).`

drop

Drop unused factor levels or not.

weights

A vector of weight to use to "uncount" data.

.remove

If TRUE, and weights is the name of a column, that column is removed from data.

col

The name quoted or not of the new column with united variable.

sep

Separator to use between values for united or separated columns.

remove

If TRUE the initial columns that are separated are also removed from data.

na.rm

If TRUE, NAs are eliminated before uniting the values.

.direction

Direction in which to fill missing data: "down" (by default), "up", or "downup" (first down, then up), "updown" (the opposite).

into

Name of the new column to put separated variables. Use NA for items to drop.

convert

If 'TRUE resulting values are converted into numeric, integer or logical.

extra

When sep is a character vector what happens when there are too many pieces: "warn" (default) issue a warning and drop extra items, "drop" does the same without warning and "merge" merges the extra items with the last one.

fill

When sep is a character vector what happens when there are not enough pieces: "warn" (default) issue a warning and fill with NAs at right, so does without warning "right", and "left" fills with NAs at left.

fixed

logical. If TRUE, sep is a fixed string, otherwise it is a (perl) regular expression.

data

A data frame, or for replace_na() a vector or a data frame.

Value

See corresponding "non-SciViews" function for the full help page with indication of the return values.

Note

The summarise_() function does not support n() as does dplyr::summarise(). You can use svBase::fn() instead, but then, you must give a variable name as argument. The svBase::fn() alternative can also be used in dplyr::summarise() for homogeneous syntax between the two. From {dplyr}, the slice_min(), slice_max() and splice_sample() functions are not added yet. From {tidyr} tidyr::expand(), tidyr::chop(), tidyr::unchop(), tidyr::nest(), tidyr::unnest(), tidyr::unnest_longer(), tidyr::unnest_wider(), tidyr::hoist(), tidyr::pack() and tidyr::unpack() are not implemented yet.

Examples

# TODO...