import pandas as pd # 加载数据 users = pd.read_excel('users.xlsx') # print(users) # 删除数据用drop # labels 指定删除的列或者行名称 # axis = 0 删除行 # axis = 1 删除列 # inplace = True 替换原来数据 data = users.drop(labels=['age','sex'], axis=1, inplace=True) print(users) 1234567891011121314
删除空值列
# 首先 整列为空,对于数据分析毫无意义 # pd.count()--->非空的数目 # drop -->删除空列 import pandas as pd # 加载数据 detail = pd.read_excel('meal_order_detail.xlsx') # print(detail.columns) columns = detail.columns # print(type(columns)) # for col in columns: # # print(col) # if (detail[col].count()) == 0:#全空列 # print(col) # detail.drop(labels=col, axis=1, inplace=True) # print(detail) col = detail.count() == 0 # print(col) length = len(col) # print(length) for i in range(length): if col[i]: detail.drop(labels=col.index[i],axis=1,inplace=True) print("detail 的形状:",detail.shape)
123456789101112131415161718192021222324252627282930313233#删除空列
#数据去重
#相似度运算
# 删除数据 # drop() import pandas as pd # 去重 # pd.drop_duplicates() # 加载数据 detail = pd.read_excel('meal_order_detail.xlsx') # 单列数据去重 # print(detail.columns) # print(detail.shape) # data = detail[['counts','amounts']].drop_duplicates() # print(data.shape) # 多列数据去重 data1 = detail.drop_duplicates(subset=['counts', 'amounts'], inplace=True) print(data1.shape)
12345678910111213141516171819202122