这是一次尝试(代码正文中的解释)
dt %>% group_by(user) %>% mutate(Count = row_number()) %>% # Count appearances per user group_by(mth) %>% mutate(new = sum(Count == 1)) %>% # Count first appearances per months summarise(new = first(new), # Summarise new users per month (for cumsum) users = list(unique(user))) %>% # Create a list of unique users per month (for notLastMonth) mutate(totNew = cumsum(new), # Calculate overall cummulative sum of unique users notLastMonth = lengths(Map(setdiff, users, lag(users)))) %>% # Compare new users to previous month select(-users) %>% right_join(dt) # Join back to the real data # A tibble: 9 × 6 # mth new totNew notLastMonth date user ## 1 2010-01 2 2 2 2010-01-10 123 # 2 2010-01 2 2 2 2010-01-11 123 # 3 2010-01 2 2 2 2010-01-12 180 # 4 2010-02 2 4 2 2010-02-13 129 # 5 2010-02 2 4 2 2010-02-14 129 # 6 2010-02 2 4 2 2010-02-14 184 # 7 2010-03 1 5 2 2010-03-22 145 # 8 2010-03 1 5 2 2010-03-23 180 # 9 2010-03 1 5 2 2010-03-24 145