使用R语言中的dplyr包和tidyr包,步骤如下:
示例代码:
library(dplyr)
library(tidyr)
df <- data.frame(
group = c("A", "A", "B", "B", "B", "C", "C", "C", "C"),
val1 = runif(9),
val2 = runif(9)
)
# 按照group字段进行分组
df_group <- df %>%
group_by(group) %>%
nest()
# 为每个子数据集添加随机序列,并展开
df_random <- df_group %>%
mutate(data = map(data, function(df) {
df %>%
mutate(rand = sample(1:n(), n())) %>%
arrange(rand) %>%
select(-rand)
})) %>%
unnest(data)
# 按0.3的比例对子集进行抽样
sampled <- df_random %>%
group_by(group) %>%
sample_frac(0.3)
下一篇:按数值大小排序对象数组