假设我有这样的数据
Id Name Price sales Profit Month Category Mode Supplier 1 A 2 5 8 1 X K John 1 A 2 6 9 2 X K John 1 A 2 5 8 3 X K John 2 B 2 4 6 1 X L Sam 2 B 2 3 4 2 X L Sam 2 B 2 5 7 3 X L Sam 3 C 2 5 11 1 X M John 3 C 2 5 11 2 X L John 3 C 2 5 11 3 X K John 4 D 2 8 10 1 Y M John 4 D 2 8 10 2 Y K John 4 D 2 5 7 3 Y K John 5 E 2 5 9 1 Y M Sam 5 E 2 5 9 2 Y L Sam 5 E 2 5 9 3 Y M Sam 6 F 2 4 7 1 Z M Kyle 6 F 2 5 8 2 Z L Kyle 6 F 2 5 8 3 Z M Kyle
如果我应用table
函数,它只会组合行和结果
K L M X 4 4 1 Y 2 1 3 Z 0 1 2
现在如果我不想要所有行的总和但只需要那些具有Unique的行的总和,Id
那么它看起来如何
K L M X 2 2 1 Y 1 1 2 Z 0 1 1
谢谢
如果df
是你的data.frame:
# Subset original data.frame to keep columns of interest df1 <- df[,c("Id", "Category", "Mode")] # Remove duplicated rows df1 <- df1[!duplicated(df1),] # Create table with(df1, table(Category, Mode)) # Mode # Category K L M # X 2 2 1 # Y 1 1 2 # Z 0 1 1
或者在一行中使用 unique
table(unique(df[c("Id", "Category", "Mode")])[-1])
df <- structure(list(Id = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), Name = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), .Label = c("A", "B", "C", "D", "E", "F"), class = "factor"), Price = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L ), sales = c(5L, 6L, 5L, 4L, 3L, 5L, 5L, 5L, 5L, 8L, 8L, 5L, 5L, 5L, 5L, 4L, 5L, 5L), Profit = c(8L, 9L, 8L, 6L, 4L, 7L, 11L, 11L, 11L, 10L, 10L, 7L, 9L, 9L, 9L, 7L, 8L, 8L), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), Category = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("X", "Y", "Z" ), class = "factor"), Mode = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 2L, 3L), .Label = c("K", "L", "M"), class = "factor"), Supplier = structure(c(1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L ), .Label = c("John", "Kyle", "Sam"), class = "factor")), .Names = c("Id", "Name", "Price", "sales", "Profit", "Month", "Category", "Mode", "Supplier"), class = "data.frame", row.names = c(NA, -18L))