机器学习:逻辑回归_案例

机器学习,逻辑回归案例。样例代码:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

"""
本案例癌症预测案例,通过前9列特征预测最后一列,2为轻,4重,默认少数4是1类,多数2是0类,逻辑回归处理的是分类问题。
"""

# 1、获取数据
data = pd.read_csv('data/breast-cancer-wisconsin.csv')
# print(data.info())

# 2、数据处理
# replace将?转为nan 
# dropna删除nan,按行删
data = data.replace(to_replace='?',value=np.nan)
data = data.dropna()

# 3、特征工程
# x是从第2列到倒数第2列
# y是最后一列即Class列
# train_test_split数据集划分
# StandardScaler数据标准化
x = data.iloc[:,1:-1]
y = data["Class"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=22)

pre = StandardScaler()
x_train = pre.fit_transform(x_train)
x_test = pre.transform(x_test)

# 4、训练模型
model = LogisticRegression()
model.fit(x_train,y_train)

# 5、模型预测
y_predict = model.predict(x_test)
print(y_predict)

# 6、模型评估
print(accuracy_score(y_test,y_predict))