This function is a small utility to create a specific length dataframe
with a set number of groups, specific mean/sd per group. Note that the total length
of the dataframe will be n
* n_grps
.
Usage
generate_df(n = 10L, n_grps = 1L, with_seed = NULL, mean = c(10), sd = mean/10)
Arguments
- n
An integer indicating the number of rows per group, default to
10
- n_grps
An integer indicating the number of rows per group, defaults to
1
- with_seed
A seed to make the randomization reproducible
- mean
A number indicating the mean of the randomly generated values, must be a vector of equal length to the
n_grps
- sd
A number indicating the standard deviation of the randomly generated values, must be a vector of equal length to the
n_grps
Examples
generate_df()
#> # A tibble: 10 × 4
#> row_id id grp values
#> <int> <chr> <chr> <dbl>
#> 1 1 Q08 grp-1 8.91
#> 2 2 H06 grp-1 11.0
#> 3 3 R10 grp-1 8.72
#> 4 4 U04 grp-1 8.82
#> 5 5 P07 grp-1 8.71
#> 6 6 Q09 grp-1 9.39
#> 7 7 V03 grp-1 10.1
#> 8 8 D09 grp-1 9.29
#> 9 9 E10 grp-1 9.53
#> 10 10 I04 grp-1 7.54
generate_df(n = 100L, n_grps = 5L, with_seed = NULL, mean = seq(10, 50, length.out = 5))
#> # A tibble: 500 × 4
#> row_id id grp values
#> <int> <chr> <chr> <dbl>
#> 1 1 A062 grp-1 10.4
#> 2 2 E082 grp-1 11.2
#> 3 3 E002 grp-1 10.1
#> 4 4 B095 grp-1 11.4
#> 5 5 L067 grp-1 7.94
#> 6 6 F079 grp-1 8.63
#> 7 7 B057 grp-1 10.2
#> 8 8 T091 grp-1 8.79
#> 9 9 O020 grp-1 11.6
#> 10 10 U042 grp-1 10.6
#> # ℹ 490 more rows
library(dplyr)
#>
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#>
#> filter, lag
#> The following objects are masked from ‘package:base’:
#>
#> intersect, setdiff, setequal, union
generate_df(
100L,
n_grps = 5,
mean = seq(10, 50, length.out = 5)
) |>
group_by(grp) |>
summarise(
mean = mean(values), # mean is approx mean
sd = sd(values), # sd is approx sd
n = n(), # each grp is of length n
# showing that the sd default of mean/10 works
`mean/sd` = round(mean / sd, 1)
)
#> # A tibble: 5 × 5
#> grp mean sd n `mean/sd`
#> <chr> <dbl> <dbl> <int> <dbl>
#> 1 grp-1 9.88 1.21 100 8.2
#> 2 grp-2 19.9 2.28 100 8.7
#> 3 grp-3 29.7 3.21 100 9.2
#> 4 grp-4 40.4 4.19 100 9.6
#> 5 grp-5 50.0 5.22 100 9.6