注意
跳转到末尾 下载完整示例代码。或通过 JupyterLite 或 Binder 在浏览器中运行此示例
连接多个特征提取方法#
在许多实际应用中,从数据集中提取特征有多种方法。通常,结合多种方法有助于获得良好的性能。本示例展示了如何使用 FeatureUnion
组合通过 PCA 和单变量选择获得的特征。
使用此转换器组合特征的好处是,它允许在整个过程中进行交叉验证和网格搜索。
本示例中使用的组合在此数据集上并非特别有用,仅用于说明 FeatureUnion 的用法。
Combined space has 3 features
Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5; 1/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 1/5; 1/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 2/5; 1/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 2/5; 1/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 3/5; 1/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 3/5; 1/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.867 total time= 0.0s
[CV 4/5; 1/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 4/5; 1/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 5/5; 1/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 5/5; 1/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 2/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=1
[CV 1/5; 2/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=1;, score=0.900 total time= 0.0s
[CV 2/5; 2/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=1
[CV 2/5; 2/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 3/5; 2/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=1
[CV 3/5; 2/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=1;, score=0.867 total time= 0.0s
[CV 4/5; 2/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=1
[CV 4/5; 2/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=1;, score=0.933 total time= 0.0s
[CV 5/5; 2/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=1
[CV 5/5; 2/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 3/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=10
[CV 1/5; 3/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=10;, score=0.933 total time= 0.0s
[CV 2/5; 3/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=10
[CV 2/5; 3/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 3/5; 3/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=10
[CV 3/5; 3/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=10;, score=0.900 total time= 0.0s
[CV 4/5; 3/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=10
[CV 4/5; 3/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=10;, score=0.933 total time= 0.0s
[CV 5/5; 3/18] START features__pca__n_components=1, features__univ_select__k=1, svm__C=10
[CV 5/5; 3/18] END features__pca__n_components=1, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 1/5; 4/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1
[CV 1/5; 4/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 2/5; 4/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1
[CV 2/5; 4/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 3/5; 4/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1
[CV 3/5; 4/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 4/5; 4/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1
[CV 4/5; 4/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 5/5; 4/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1
[CV 5/5; 4/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 5/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=1
[CV 1/5; 5/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=1;, score=0.933 total time= 0.0s
[CV 2/5; 5/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=1
[CV 2/5; 5/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 3/5; 5/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=1
[CV 3/5; 5/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=1;, score=0.933 total time= 0.0s
[CV 4/5; 5/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=1
[CV 4/5; 5/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=1;, score=0.933 total time= 0.0s
[CV 5/5; 5/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=1
[CV 5/5; 5/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 6/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=10
[CV 1/5; 6/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=10;, score=0.967 total time= 0.0s
[CV 2/5; 6/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=10
[CV 2/5; 6/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=10;, score=0.967 total time= 0.0s
[CV 3/5; 6/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=10
[CV 3/5; 6/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=10;, score=0.933 total time= 0.0s
[CV 4/5; 6/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=10
[CV 4/5; 6/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=10;, score=0.933 total time= 0.0s
[CV 5/5; 6/18] START features__pca__n_components=1, features__univ_select__k=2, svm__C=10
[CV 5/5; 6/18] END features__pca__n_components=1, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
[CV 1/5; 7/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1
[CV 1/5; 7/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 2/5; 7/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1
[CV 2/5; 7/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 3/5; 7/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1
[CV 3/5; 7/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1;, score=0.867 total time= 0.0s
[CV 4/5; 7/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1
[CV 4/5; 7/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 5/5; 7/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1
[CV 5/5; 7/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 8/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=1
[CV 1/5; 8/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=1;, score=0.967 total time= 0.0s
[CV 2/5; 8/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=1
[CV 2/5; 8/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 3/5; 8/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=1
[CV 3/5; 8/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=1;, score=0.933 total time= 0.0s
[CV 4/5; 8/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=1
[CV 4/5; 8/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=1;, score=0.933 total time= 0.0s
[CV 5/5; 8/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=1
[CV 5/5; 8/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 9/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=10
[CV 1/5; 9/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=10;, score=0.967 total time= 0.0s
[CV 2/5; 9/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=10
[CV 2/5; 9/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=10;, score=0.967 total time= 0.0s
[CV 3/5; 9/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=10
[CV 3/5; 9/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=10;, score=0.900 total time= 0.0s
[CV 4/5; 9/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=10
[CV 4/5; 9/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=10;, score=0.933 total time= 0.0s
[CV 5/5; 9/18] START features__pca__n_components=2, features__univ_select__k=1, svm__C=10
[CV 5/5; 9/18] END features__pca__n_components=2, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 1/5; 10/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1
[CV 1/5; 10/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 2/5; 10/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1
[CV 2/5; 10/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 3/5; 10/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1
[CV 3/5; 10/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 4/5; 10/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1
[CV 4/5; 10/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 5/5; 10/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1
[CV 5/5; 10/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 11/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=1
[CV 1/5; 11/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 2/5; 11/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=1
[CV 2/5; 11/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=1;, score=1.000 total time= 0.0s
[CV 3/5; 11/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=1
[CV 3/5; 11/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=1;, score=0.933 total time= 0.0s
[CV 4/5; 11/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=1
[CV 4/5; 11/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 5/5; 11/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=1
[CV 5/5; 11/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 12/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=10
[CV 1/5; 12/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=10;, score=0.967 total time= 0.0s
[CV 2/5; 12/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=10
[CV 2/5; 12/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
[CV 3/5; 12/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=10
[CV 3/5; 12/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=10;, score=0.900 total time= 0.0s
[CV 4/5; 12/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=10
[CV 4/5; 12/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=10;, score=0.933 total time= 0.0s
[CV 5/5; 12/18] START features__pca__n_components=2, features__univ_select__k=2, svm__C=10
[CV 5/5; 12/18] END features__pca__n_components=2, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
[CV 1/5; 13/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1
[CV 1/5; 13/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 2/5; 13/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1
[CV 2/5; 13/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 3/5; 13/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1
[CV 3/5; 13/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 4/5; 13/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1
[CV 4/5; 13/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 5/5; 13/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1
[CV 5/5; 13/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 14/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=1
[CV 1/5; 14/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=1;, score=0.967 total time= 0.0s
[CV 2/5; 14/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=1
[CV 2/5; 14/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 3/5; 14/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=1
[CV 3/5; 14/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=1;, score=0.933 total time= 0.0s
[CV 4/5; 14/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=1
[CV 4/5; 14/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=1;, score=0.967 total time= 0.0s
[CV 5/5; 14/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=1
[CV 5/5; 14/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 15/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=10
[CV 1/5; 15/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 2/5; 15/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=10
[CV 2/5; 15/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 3/5; 15/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=10
[CV 3/5; 15/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=10;, score=0.933 total time= 0.0s
[CV 4/5; 15/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=10
[CV 4/5; 15/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=10;, score=0.967 total time= 0.0s
[CV 5/5; 15/18] START features__pca__n_components=3, features__univ_select__k=1, svm__C=10
[CV 5/5; 15/18] END features__pca__n_components=3, features__univ_select__k=1, svm__C=10;, score=1.000 total time= 0.0s
[CV 1/5; 16/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1
[CV 1/5; 16/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 2/5; 16/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1
[CV 2/5; 16/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 3/5; 16/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1
[CV 3/5; 16/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1;, score=0.933 total time= 0.0s
[CV 4/5; 16/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1
[CV 4/5; 16/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1;, score=0.967 total time= 0.0s
[CV 5/5; 16/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1
[CV 5/5; 16/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1;, score=1.000 total time= 0.0s
[CV 1/5; 17/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=1
[CV 1/5; 17/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 2/5; 17/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=1
[CV 2/5; 17/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=1;, score=1.000 total time= 0.0s
[CV 3/5; 17/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=1
[CV 3/5; 17/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 4/5; 17/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=1
[CV 4/5; 17/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=1;, score=0.967 total time= 0.0s
[CV 5/5; 17/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=1
[CV 5/5; 17/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=1;, score=1.000 total time= 0.0s
[CV 1/5; 18/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=10
[CV 1/5; 18/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
[CV 2/5; 18/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=10
[CV 2/5; 18/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
[CV 3/5; 18/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=10
[CV 3/5; 18/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=10;, score=0.900 total time= 0.0s
[CV 4/5; 18/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=10
[CV 4/5; 18/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=10;, score=0.967 total time= 0.0s
[CV 5/5; 18/18] START features__pca__n_components=3, features__univ_select__k=2, svm__C=10
[CV 5/5; 18/18] END features__pca__n_components=3, features__univ_select__k=2, svm__C=10;, score=1.000 total time= 0.0s
Pipeline(steps=[('features',
FeatureUnion(transformer_list=[('pca', PCA(n_components=3)),
('univ_select',
SelectKBest(k=1))])),
('svm', SVC(C=10, kernel='linear'))])
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.svm import SVC
iris = load_iris()
X, y = iris.data, iris.target
# This dataset is way too high-dimensional. Better do PCA:
pca = PCA(n_components=2)
# Maybe some original features were good, too?
selection = SelectKBest(k=1)
# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
# Use combined features to transform dataset:
X_features = combined_features.fit(X, y).transform(X)
print("Combined space has", X_features.shape[1], "features")
svm = SVC(kernel="linear")
# Do grid search over k, n_components and C:
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
param_grid = dict(
features__pca__n_components=[1, 2, 3],
features__univ_select__k=[1, 2],
svm__C=[0.1, 1, 10],
)
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
grid_search.fit(X, y)
print(grid_search.best_estimator_)
脚本总运行时间: (0 分 0.406 秒)
相关示例