import pandas as pd
# 创建示例数据
df = pd.DataFrame({'id': [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
'value': [1, 2, None, None, 3, None, None, None, None, None, 4, 5]})
# 按id分组并计算每个id的连续NA值的个数
df['NA_count'] = df.groupby('id')['value'].apply(lambda x: x.isnull().astype(int).diff().fillna(0).cumsum())
# 重新开始计数
df['NA_count'] = df.groupby(['id', df['NA_count'].ne(df['NA_count'].shift())]).cumcount() + 1
结果:
id value NA_count
0 1 1.0 0
1 1 2.0 0
2 1 NaN 1
3 1 NaN 2
4 1 3.0 0
5 1 NaN 1
6 1 NaN 2
7 2 NaN 0
8 2 NaN 1
9 2 NaN 2
10 2 4.0 0
11 2 5.0 0