DAY 9
1
AI & Data

## 訓練樣本與測試樣本

``````\$samples_20percent = Array(); //宣告20% samples 為Array
\$labels_20percent = Array();  //宣告20% labels  為Array
\$samples_80percent = Array(); //宣告80% samples 為Array
\$labels_80percent = Array();  //宣告80% labels  為Array

/**
* 取得20%數量的亂數
*/
\$randValue = Array(); //定義為陣列
\$count = \$count_20percent; //產生指定數量
for (\$i=1; \$i<=\$count; \$i++) {
\$randValueTemp = mt_rand(0,count(\$getSample)-1); //產生0~(總數量-1)的亂數
if (in_array(\$randValueTemp, \$randValue)) { //如果已產生過迴圈重跑
\$i--;
}else{
\$randValue[] = \$randValueTemp; //若無重復則將亂數塞入陣列
}
}

asort(\$randValue);  //排序
foreach(\$randValue as \$value){
//把陣列內的亂數讀出，就將要的20% samples跟labels寫入到指定變數內
\$samples_20percent[] = \$total_sample[\$value];
\$labels_20percent[] = \$getTargets[\$value];

//刪除已取出資料的陣列元素
unset(\$total_sample[\$value]);
unset(\$getTargets[\$value]);
}

//20％擷取完畢資料，剩下的資料為80％的部分，array_values()方法函式會返回所指定陣列中所有的值並將其建立新索引(由0開始)
\$samples_80percent = array_values(\$total_sample);
\$labels_80percent = array_values(\$getTargets);
``````

``````KNearestNeighbors(\$k=3)
``````

``````require_once __DIR__ . '/vendor/autoload.php';

use Phpml\Classification\KNearestNeighbors;
use Phpml\Dataset\CsvDataset;

//讀取Excel
\$dataset = new CsvDataset('iris.csv',4);

//取得相關數值
\$getSample = \$dataset->getSamples();
\$getTargets = \$dataset->getTargets();

// max(最大化)
\$sepalLength_max = 0;
\$sepalWidth_max = 0;
\$petalLength_max = 0;
\$petalWidth_max = 0;

// min(最小化)
\$sepalLength_min = 0;
\$sepalWidth_min = 0;
\$petalLength_min = 0;
\$petalWidth_min = 0;

// array(標準化數值)
\$sepalLength_array = [];
\$sepalWidth_array = [];
\$petalLength_array = [];
\$petalWidth_array = [];

for(\$i=0; \$i<count(\$getSample); \$i++){
if(\$i==0){
// max(最大化參數賦予初始值)
\$sepalLength_max = \$getSample[\$i][0];
\$sepalWidth_max = \$getSample[\$i][1];
\$petalLength_max = \$getSample[\$i][2];
\$petalWidth_max = \$getSample[\$i][3];

// min(最小化參數賦予初始值)
\$sepalLength_min = \$getSample[\$i][0];
\$sepalWidth_min = \$getSample[\$i][1];
\$petalLength_min = \$getSample[\$i][2];
\$petalWidth_min = \$getSample[\$i][3];
}

// max(比較最大化)
if(\$getSample[\$i][0] > \$sepalLength_max){
\$sepalLength_max = \$getSample[\$i][0];
}
if(\$getSample[\$i][1] > \$sepalWidth_max){
\$sepalWidth_max = \$getSample[\$i][1];
}
if(\$getSample[\$i][2] > \$petalLength_max){
\$petalLength_max = \$getSample[\$i][2];
}
if(\$getSample[\$i][3] > \$petalWidth_max){
\$petalWidth_max = \$getSample[\$i][3];
}

// mix(比較最小化)
if(\$getSample[\$i][0] < \$sepalLength_min){
\$sepalLength_min = \$getSample[\$i][0];
}
if(\$getSample[\$i][1] < \$sepalWidth_min){
\$sepalWidth_min = \$getSample[\$i][1];
}
if(\$getSample[\$i][2] < \$petalLength_min){
\$petalLength_min = \$getSample[\$i][2];
}
if(\$getSample[\$i][3] < \$petalWidth_min){
\$petalWidth_min = \$getSample[\$i][3];
}
}

// x'= (x-min)/(max - min) 標準化數值（有效值取到小數第三位）
for(\$i=0; \$i<count(\$getSample); \$i++){
\$sepalLength_array[] = round((\$getSample[\$i][0]-\$sepalLength_min)/(\$sepalLength_max-\$sepalLength_min), 3);
\$sepalWidth_array[] = round((\$getSample[\$i][1]-\$sepalWidth_min)/(\$sepalWidth_max-\$sepalWidth_min), 3);
\$petalLength_array[] = round((\$getSample[\$i][2]-\$petalLength_min)/(\$petalLength_max-\$petalLength_min), 3);
\$petalWidth_array[] = round((\$getSample[\$i][3]-\$petalWidth_min)/(\$petalWidth_max-\$petalWidth_min), 3);
}

\$count_total = count(\$getSample);
\$count_20percent = round(\$count_total * 0.2);
\$count_80percent = \$count_total - \$count_20percent;

\$total_sample = Array();
for(\$i=0; \$i<count(\$sepalLength_array); \$i++){
\$tempArrayValue = array(
\$sepalLength_array[\$i],
\$sepalWidth_array[\$i],
\$petalLength_array[\$i],
\$petalWidth_array[\$i],
);
\$total_sample[] = \$tempArrayValue;
}

\$samples_20percent = Array(); //宣告20% samples 為Array
\$labels_20percent = Array();  //宣告20% labels  為Array
\$samples_80percent = Array(); //宣告80% samples 為Array
\$labels_80percent = Array();  //宣告80% labels  為Array

/**
* 取得20%數量的亂數
*/
\$randValue = Array(); //定義為陣列
\$count = \$count_20percent; //產生指定數量
for (\$i=1; \$i<=\$count; \$i++) {
\$randValueTemp = mt_rand(0,count(\$getSample)-1); //產生0~(總數量-1)的亂數
if (in_array(\$randValueTemp, \$randValue)) { //如果已產生過迴圈重跑
\$i--;
}else{
\$randValue[] = \$randValueTemp; //若無重復則將亂數塞入陣列
}
}

asort(\$randValue);  //排序
foreach(\$randValue as \$value){
//把陣列內的亂數讀出，就將要的20% samples跟labels寫入到指定變數內
\$samples_20percent[] = \$total_sample[\$value];
\$labels_20percent[] = \$getTargets[\$value];

//刪除已取出資料的陣列元素
unset(\$total_sample[\$value]);
unset(\$getTargets[\$value]);
}

//20％擷取完畢資料，剩下的資料為80％的部分，array_values()方法函式會返回所指定陣列中所有的值並將其建立新索引(由0開始)
\$samples_80percent = array_values(\$total_sample);
\$labels_80percent = array_values(\$getTargets);

\$classifier = new KNearestNeighbors(\$k=3);
\$classifier->train(\$samples_80percent, \$labels_80percent);

\$resultDate = \$classifier->predict(\$samples_20percent);

echo "<pre>";
var_dump(\$resultDate);
echo "</pre>";
``````