机器学习,逻辑回归案例。样例代码:
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score """ 本案例癌症预测案例,通过前9列特征预测最后一列,2为轻,4重,默认少数4是1类,多数2是0类,逻辑回归处理的是分类问题。 """ # 1、获取数据 data = pd.read_csv('data/breast-cancer-wisconsin.csv') # print(data.info()) # 2、数据处理 # replace将?转为nan # dropna删除nan,按行删 data = data.replace(to_replace='?',value=np.nan) data = data.dropna() # 3、特征工程 # x是从第2列到倒数第2列 # y是最后一列即Class列 # train_test_split数据集划分 # StandardScaler数据标准化 x = data.iloc[:,1:-1] y = data["Class"] x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=22) pre = StandardScaler() x_train = pre.fit_transform(x_train) x_test = pre.transform(x_test) # 4、训练模型 model = LogisticRegression() model.fit(x_train,y_train) # 5、模型预测 y_predict = model.predict(x_test) print(y_predict) # 6、模型评估 print(accuracy_score(y_test,y_predict))