✓ 已复制到剪贴板
本文件夹代码集合
💡 快捷键提示:
Ctrl
+
Shift
+
1/2/3/4
快速复制对应代码 |
双击
文件名快速复制
复制 折线图
Ctrl+Shift+1
复制 柱状图
Ctrl+Shift+2
复制 饼图
Ctrl+Shift+3
复制 鸢尾花
Ctrl+Shift+4
折线图.py
快速复制
# 1. 先导入库并加载鸢尾花数据集 from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np # 2. 加载数据集并提取变量(定义所有用到的变量) iris = load_iris() sample_count = len(iris.data) # 样本数量 feature_count = iris.data.shape[1] # 特征数量 class_names = iris.target_names # 类别名称 feature_names = iris.feature_names # 特征名称 # 3. 划分训练集/测试集(定义train_size、test_size) X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=42 ) train_size = len(X_train) test_size = len(X_test) # 4. 现在再执行你的打印代码 print("=== 鸢尾花数据集数据概览化 ===") print("数据集信息:") print(f"- 样本数量:{sample_count}") print(f"- 特征数量:{feature_count}") print(f"- 类别名称:{class_names}") print(f"- 特征名称:{feature_names}") print("\n数据集标准化:") print(f"训练集大小:{train_size} 个样本") print(f"测试集大小:{test_size} 个样本") 2.import pandas as pd import numpy as np import matplotlib.pyplot as plt # 修正库导入 # 1. 定义列名、索引(学生姓名)、成绩数据 a = np.array(["数学", "英语"]) # 科目列 a1 = np.array(["小米", "张三", "李四", "王五"]) # 学生索引 a2 = np.array([[88, 95], [85, 75], [92, 88], [70, 82]]) # 修正数据维度:4行2列(对应4学生、2科目) # 2. 创建DataFrame h = pd.DataFrame(data=a2, columns=a, index=a1) print(h) # 3. 绘制折线图 plt.plot(h.index, h["数学"], label="数学") plt.plot(h.index, h["英语"], label="英语") plt.title('学生成绩折线图') plt.xlabel("学生姓名") plt.ylabel("成绩") plt.legend() # 显示图例 plt.show()
柱状图.py
快速复制
import pandas as pd import numpy as np import matplotlib.pyplot as plt # 设置中文字体支持 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False # 1. 定义数据 a = np.array(["数学", "英语"]) a1 = np.array(["小米", "张三", "李四", "王五"]) a2 = np.array([[88, 95], [85, 75], [92, 88], [70, 82]]) # 2. 创建DataFrame h = pd.DataFrame(data=a2, columns=a, index=a1) print(h) print() # 3. 绘制柱状图并在顶部显示成绩 plt.figure(figsize=(10, 6)) # 设置柱状图的位置 x = np.arange(len(a1)) # 学生位置 [0, 1, 2, 3] width = 0.35 # 柱子的宽度 # 绘制两组柱子 bars1 = plt.bar(x - width/2, h["数学"], width=width, label='数学', color='skyblue', alpha=0.8) bars2 = plt.bar(x + width/2, h["英语"], width=width, label='英语', color='lightcoral', alpha=0.8) # 在柱子顶部显示成绩数值 def add_labels(bars): for bar in bars: height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height, f'{height}', ha='center', va='bottom', fontsize=10, fontweight='bold') add_labels(bars1) add_labels(bars2) plt.title('学生成绩柱状图', fontsize=16, fontweight='bold') plt.xlabel("学生姓名", fontsize=12) plt.ylabel("成绩", fontsize=12) plt.xticks(x, a1) # 设置x轴标签为学生姓名 plt.legend(fontsize=12) plt.grid(True, linestyle='--', alpha=0.3, axis='y') # 只显示y轴网格 # 设置y轴范围,让顶部有足够空间显示数值 plt.ylim(0, 105) plt.tight_layout() plt.show()
饼图.py
快速复制
import pandas as pd import numpy as np import matplotlib.pyplot as plt # 设置中文字体支持 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False # 1. 定义数据 a = np.array(["数学", "英语"]) a1 = np.array(["小米", "张三", "李四", "王五"]) a2 = np.array([[88, 95], [85, 75], [92, 88], [70, 82]]) # 2. 创建DataFrame h = pd.DataFrame(data=a2, columns=a, index=a1) print(h) # 3. 绘制饼图 - 显示某个学生的成绩分布 student_name = "小米" # 可以选择不同的学生 student_scores = h.loc[student_name] plt.figure(figsize=(10, 8)) # 绘制饼图 colors = ['#ff9999', '#66b3ff'] # 设置颜色 plt.pie(student_scores, labels=student_scores.index, autopct='%1.1f%%', colors=colors, startangle=90, textprops={'fontsize': 12}) plt.title(f'{student_name}的成绩分布饼图', fontsize=16) plt.axis('equal') # 保证饼图是圆形 plt.tight_layout() plt.show()
鸢尾花.py
快速复制
# 先安装numpy # python -m pip install numpy==1.21.6 -i https://pypi.tuna.tsinghua.edu.cn/simple/ # # 安装预编译的scipy 1.7.3 # python -m pip install scipy==1.7.3 --only-binary=all -i https://pypi.tuna.tsinghua.edu.cn/simple/ # # 安装scikit-learn 1.0.2 # python -m pip install scikit-learn==1.0.2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split # 导入鸢尾花数据集 iris = load_iris() # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.4, random_state=42 ) # 输出相关信息 print(f"样本数量: {iris.data.shape[0]}") print(f"特征数量: {iris.data.shape[1]}") print(f"类别名称: {list(iris.target_names)}") print(f"特征名称: {list(iris.feature_names)}") print(f"训练集大小: {X_train.shape[0]}") print(f"测试集大小: {X_test.shape[0]}") import pandas as pd import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # 模拟数据:包含房屋的面积、房间数、楼层、建造年份、位置(类别变量),以及房价(目标变量) data = { 'area': [70, 85, 100, 120, 60, 150, 200, 80, 95, 110], 'rooms': [2, 3, 3, 4, 2, 5, 6, 3, 3, 4], 'floor': [5, 2, 8, 10, 3, 15, 18, 7, 9, 11], 'year_built': [2005, 2010, 2012, 2015, 2000, 2018, 2020, 2008, 2011, 2016], 'location': ['Chaoyang', 'Haidian', 'Chaoyang', 'Dongcheng', 'Fengtai', 'Haidian', 'Chaoyang', 'Fengtai', 'Dongcheng', 'Haidian'], 'price': [5000000, 6000000, 6500000, 7000000, 4500000, 10000000, 12000000, 5500000, 6200000, 7500000] # 房价(目标变量) } # 创建 DataFrame df = pd.DataFrame(data) # 查看数据 print("数据预览:") print(df.head()) # 特征选择 X = df[['area', 'rooms', 'floor', 'year_built', 'location']] # 特征数据 y = df['price'] # 目标变量 # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 数据预处理:数值特征标准化、类别特征 One-Hot 编码 numeric_features = ['area', 'rooms', 'floor', 'year_built'] categorical_features = ['location'] # 构建数值特征子管道 numeric_transformer 即 数值特征预处理:标准化 numeric_transformer = Pipeline(steps=[ ('scaler', StandardScaler()) ]) # 构建类别特征子管道 categorical_transformer 即 类别特征预处理:One-Hot 编码,设置 handle_unknown='ignore' categorical_transformer = Pipeline(steps=[ ('onehot', OneHotEncoder(handle_unknown='ignore')) # 处理测试集中的新类别 ]) # 合并数值和类别特征的处理步骤 即 组合列变换器 ColumnTransformer(预处理器) #自动区分并处理不同类型特征 # "名称": "num", # "管道": "numeric_transformer", #"特征列": "numeric_features" preprocessor = ColumnTransformer( transformers=[ ('num', numeric_transformer, numeric_features), ('cat', categorical_transformer, categorical_features) ] ) # 3. 建立模型,搭建完整模型管道 # 使用线性回归模型,结合数据预处理步骤 #创建一个名为 model_pipeline 的流水线对象 model_pipeline = Pipeline(steps=[ ('preprocessor', preprocessor), ('regressor', LinearRegression()) ]) # 训练模型 model_pipeline.fit(X_train, y_train) # 对测试集进行预测 y_pred = model_pipeline.predict(X_test) # 输出预测结果 print("\n预测结果:") print(y_pred) # 4. 模型评估:计算均方误差(MSE)和 R² 决定系数 #meansquarederror 衡量“预测值与真实值平均相差多少平方单位”;越小越好, mse = mean_squared_error(y_test, y_pred) #衡量“模型相比 baseline(均值)多解释了多少比例的目标方差”,越接近 1 越好,负值说明模型不如“直接猜平均值” r2 = r2_score(y_test, y_pred) print("\n模型评估:") print(f"均方误差 (MSE): {mse:.2f}") print(f"决定系数 (R²): {r2:.2f}") # 5. 模型优化:使用网格搜索调整超参数 # 对线性回归的超参数进行调优(仅调整 'fit_intercept') param_grid = { 'regressor__fit_intercept': [True, False], # 是否拟合截距 } grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1) grid_search.fit(X_train, y_train) # 输出最佳参数和结果 print("\n最佳参数:") print(grid_search.best_params_) # 使用最佳模型进行预测 best_model = grid_search.best_estimator_ y_pred_optimized = best_model.predict(X_test) # 输出优化后的评估结果 mse_opt = mean_squared_error(y_test, y_pred_optimized) r2_opt = r2_score(y_test, y_pred_optimized) print("\n优化后的模型评估:") print(f"均方误差 (MSE): {mse_opt:.2f}") print(f"决定系数 (R²): {r2_opt:.2f}")