不要把遗憾留在CSGO

import pandas as pd
from sklearn import preprocessing
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 导入所需的库
# 读取训练数据和测试数据
data_train=pd.read_csv("Train_data.csv") # 从CSV文件中读取训练数据,并存储在名为data_train的DataFrame中
data_test=pd.read_csv("Test_data.csv") # 从CSV文件中读取测试数据,并存储在名为data_test的DataFrame中
def encoding(df):
# 对DataFrame中的每一列进行标签编码
for col in df.columns: # 遍历DataFrame的每一列
if df[col].dtype == 'object': # 如果列的数据类型是object(字符串类型)
label_encoder = preprocessing.LabelEncoder() # 创建一个LabelEncoder对象
df[col] = label_encoder.fit_transform(df[col]) # 使用LabelEncoder对列进行标签编码
# 对训练数据进行标签编码
encoding(data_train)
X = data_train.drop(["class"], axis=1) # 从训练数据中删除"class"列,并将其赋值给X
y = data_train["class"] # 将训练数据的"class"列赋值给y
# 特征选择
select_best_cols = SelectKBest(mutual_info_classif, k=25) # 使用互信息进行特征选择,选择最好的25个特征
select_best_cols.fit(X, y) # 对X和y进行特征选择
selected_features = X.columns[select_best_cols.get_support()] # 获取被选择的特征列的列名
X = X[selected_features] # 保留被选择的特征列
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 将数据划分为训练集和测试集,测试集占总数据的30%,随机种子为42
# 数据标准化
sc = StandardScaler() # 创建StandardScaler对象
X_train=sc.fit_transform(X_train) # 对训练集进行数据标准化
X_test=sc.transform(X_test) # 对测试集进行数据标准化
def classalgo_test(x_train, x_test, y_train, y_test):
rfc = RandomForestClassifier() # 创建随机森林分类器对象
algo = rfc # 将随机森林分类器对象赋值给algo
algo.fit(x_train, y_train) # 使用训练集进行训练
y_test_pred = algo.predict(x_test) # 使用测试集进行预测
test_acc = "{:.2f}".format(accuracy_score(y_test, y_test_pred)) # 计算预测准确率
return test_acc
# 调用分类算法进行测试
a = classalgo_test(X_train, X_test, y_train, y_test) # 调用classalgo_test函数进行测试
print(a) # 打印预测准确率
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# 读取数据集
data = pd.read_csv("sms_spam.csv", encoding='ISO-8859-1')
# 定义恶意邮件关键词词库
words = set()
# 对数据集进行数据清洗
column = 'text'
data[column] = data[column].str.lower() # 将文本转为小写
data[column] = data[column].str.replace('[^a-zA-Z ]', '') # 只保留字母和空格
data[column] = data[column].str.strip() # 去除多余空格
# 划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(data["text"], data["type"], test_size=0.1, random_state=42)
# 构建模型
for doc in X_train:
words.update(set(doc.split())) # 更新关键词词库
X_train_counts = np.array([[doc.count(word) for word in words] for doc in X_train]) # 构建训练集的特征矩阵
X_test_counts = np.array([[doc.count(word) for word in words] for doc in X_test]) # 构建测试集的特征矩阵
model = MultinomialNB() # 创建朴素贝叶斯分类器对象
model.fit(X_train_counts, Y_train) # 训练模型
# 评价模型
Y_pred = model.predict(X_test_counts) # 对测试集进行预测
accuracy = accuracy_score(Y_test, Y_pred) # 计算准确率
# 输出评价值
print(f'{accuracy:.2f}') # 打印准确率
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# 创建空列表
str_list = [] # 存储字符串类型的数据
digital_list = [] # 存储数字化的数据
my_dict = None # 存储字符串和数字的映射关系
# 读取数据
crime_data = pd.read_csv('crime.csv')
# 处理字符串类型的列
for col in crime_data.columns:
if crime_data[col].dtypes == 'object': # 判断列类型为字符串
unique_data = crime_data[col].unique() # 获取唯一值
for item in unique_data:
str_list.append(item) # 存储唯一值
crime_data.drop_duplicates() # 删除重复值
crime_data.dropna() # 删除缺失值
# 将字符串类型的列转换为数字
for col in crime_data.columns:
if crime_data[col].dtypes == 'object': # 判断列类型为字符串
le = LabelEncoder()
crime_data[col] = le.fit_transform(crime_data[col]) # 进行标签编码
unique_data = crime_data[col].unique() # 获取唯一值
for item in unique_data:
digital_list.append(item) # 存储数字化的值
pairs = zip(str_list, digital_list) # 将字符串和数字对应起来
my_dict = {key: value for key, value in pairs} # 构建映射字典
# 获取特征和标签
x = crime_data.loc[:, ['NEIGHBOURHOOD', 'MONTH']]
y = crime_data.loc[:, 'TYPE']
# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=10)
# 创建决策树分类器对象
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train) # 训练模型
# 创建新数据示例
new_data = {"NEIGHBOURHOOD": my_dict['Sunset'], "MONTH": '3'}
# 进行预测
prediction = dtc.predict(pd.DataFrame([new_data]))
# 根据预测结果获取对应的犯罪类型
for k, v in my_dict.items():
if v == prediction:
outcome = k
break
# 打印预测结果
print('根据预测可能的犯罪类型是:', outcome)
import math
import os
import sys
import pickle
import numpy as np
from numpy import *
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV