欢迎光临散文网 会员登陆 & 注册

不要把遗憾留在CSGO

2023-06-30 10:25 作者:I春树I  | 我要投稿

import pandas as pd

from sklearn import preprocessing

from sklearn.feature_selection import mutual_info_classif

from sklearn.feature_selection import SelectKBest

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score


# 导入所需的库


# 读取训练数据和测试数据

data_train=pd.read_csv("Train_data.csv") # 从CSV文件中读取训练数据,并存储在名为data_train的DataFrame中

data_test=pd.read_csv("Test_data.csv") # 从CSV文件中读取测试数据,并存储在名为data_test的DataFrame中


def encoding(df):

# 对DataFrame中的每一列进行标签编码

for col in df.columns: # 遍历DataFrame的每一列

if df[col].dtype == 'object': # 如果列的数据类型是object(字符串类型)

label_encoder = preprocessing.LabelEncoder() # 创建一个LabelEncoder对象

df[col] = label_encoder.fit_transform(df[col]) # 使用LabelEncoder对列进行标签编码


# 对训练数据进行标签编码

encoding(data_train)


X = data_train.drop(["class"], axis=1) # 从训练数据中删除"class"列,并将其赋值给X

y = data_train["class"] # 将训练数据的"class"列赋值给y


# 特征选择

select_best_cols = SelectKBest(mutual_info_classif, k=25) # 使用互信息进行特征选择,选择最好的25个特征

select_best_cols.fit(X, y) # 对X和y进行特征选择

selected_features = X.columns[select_best_cols.get_support()] # 获取被选择的特征列的列名

X = X[selected_features] # 保留被选择的特征列


# 划分训练集和测试集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 将数据划分为训练集和测试集,测试集占总数据的30%,随机种子为42


# 数据标准化

sc = StandardScaler() # 创建StandardScaler对象

X_train=sc.fit_transform(X_train) # 对训练集进行数据标准化

X_test=sc.transform(X_test) # 对测试集进行数据标准化


def classalgo_test(x_train, x_test, y_train, y_test):

rfc = RandomForestClassifier() # 创建随机森林分类器对象

algo = rfc # 将随机森林分类器对象赋值给algo

algo.fit(x_train, y_train) # 使用训练集进行训练

y_test_pred = algo.predict(x_test) # 使用测试集进行预测

test_acc = "{:.2f}".format(accuracy_score(y_test, y_test_pred)) # 计算预测准确率

return test_acc


# 调用分类算法进行测试

a = classalgo_test(X_train, X_test, y_train, y_test) # 调用classalgo_test函数进行测试

print(a) # 打印预测准确率



import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# 读取数据集

data = pd.read_csv("sms_spam.csv", encoding='ISO-8859-1')


# 定义恶意邮件关键词词库

words = set()


# 对数据集进行数据清洗

column = 'text'

data[column] = data[column].str.lower() # 将文本转为小写

data[column] = data[column].str.replace('[^a-zA-Z ]', '') # 只保留字母和空格

data[column] = data[column].str.strip() # 去除多余空格


# 划分训练集和测试集

X_train, X_test, Y_train, Y_test = train_test_split(data["text"], data["type"], test_size=0.1, random_state=42)


# 构建模型

for doc in X_train:

words.update(set(doc.split())) # 更新关键词词库

X_train_counts = np.array([[doc.count(word) for word in words] for doc in X_train]) # 构建训练集的特征矩阵

X_test_counts = np.array([[doc.count(word) for word in words] for doc in X_test]) # 构建测试集的特征矩阵

model = MultinomialNB() # 创建朴素贝叶斯分类器对象

model.fit(X_train_counts, Y_train) # 训练模型


# 评价模型

Y_pred = model.predict(X_test_counts) # 对测试集进行预测

accuracy = accuracy_score(Y_test, Y_pred) # 计算准确率


# 输出评价值

print(f'{accuracy:.2f}') # 打印准确率




import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score

from sklearn.preprocessing import LabelEncoder


# 创建空列表

str_list = [] # 存储字符串类型的数据

digital_list = [] # 存储数字化的数据

my_dict = None # 存储字符串和数字的映射关系


# 读取数据

crime_data = pd.read_csv('crime.csv')


# 处理字符串类型的列

for col in crime_data.columns:

if crime_data[col].dtypes == 'object': # 判断列类型为字符串

unique_data = crime_data[col].unique() # 获取唯一值

for item in unique_data:

str_list.append(item) # 存储唯一值


crime_data.drop_duplicates() # 删除重复值

crime_data.dropna() # 删除缺失值


# 将字符串类型的列转换为数字

for col in crime_data.columns:

if crime_data[col].dtypes == 'object': # 判断列类型为字符串

le = LabelEncoder()

crime_data[col] = le.fit_transform(crime_data[col]) # 进行标签编码

unique_data = crime_data[col].unique() # 获取唯一值

for item in unique_data:

digital_list.append(item) # 存储数字化的值


pairs = zip(str_list, digital_list) # 将字符串和数字对应起来

my_dict = {key: value for key, value in pairs} # 构建映射字典


# 获取特征和标签

x = crime_data.loc[:, ['NEIGHBOURHOOD', 'MONTH']]

y = crime_data.loc[:, 'TYPE']


# 划分训练集和测试集

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=10)


# 创建决策树分类器对象

dtc = DecisionTreeClassifier()

dtc.fit(x_train, y_train) # 训练模型


# 创建新数据示例

new_data = {"NEIGHBOURHOOD": my_dict['Sunset'], "MONTH": '3'}


# 进行预测

prediction = dtc.predict(pd.DataFrame([new_data]))


# 根据预测结果获取对应的犯罪类型

for k, v in my_dict.items():

if v == prediction:

outcome = k

break


# 打印预测结果

print('根据预测可能的犯罪类型是:', outcome)



import math

import os

import sys

import pickle

import numpy as np

from numpy import *

from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV


不要把遗憾留在CSGO的评论 (共 条)

分享到微博请遵守国家法律