資源簡介
ID3算法,使用熵最小策略構建決策樹,MATLAB實現代碼。對應中科大機器學習課程中ID3算法實現.

代碼片段和文件信息
%%?ID3函數,使用熵最小策略構建決策樹,并打印顯示
%?zzhh@mail.ustc.edu.cn?創建時間:2019年3月17日
%?運行版本:R2018a(9.4.0.813654)win64
function?myTree?=?ID3(datasetlabels)
%?輸入參數:
%?dataset:數據集,元胞數組或字符串數組
%?labels:屬性標簽,元胞數組或字符串數組
%?輸出參數:
%?myTree:構建的決策樹,containers.Map類型
myTree?=?createTree(datasetlabels);?%生成決策樹
[nodeidsnodevaluebranchvalue]?=?print_tree(myTree);?%解析決策樹
tree_plot(nodeidsnodevaluebranchvalue);?%畫出決策樹
end
%%?使用熵最小策略構建決策樹
function?myTree?=?createTree(datasetlabels)
%?輸入參數:
%?dataset:數據集,元胞數組或字符串數組
%?labels:屬性標簽,元胞數組或字符串數組
%?輸出參數:
%?myTree:構建的決策樹,containers.Map類型
%?數據為空,則報錯
if(isempty(dataset))
????error(‘必須提供數據!‘)
end
size_data?=?size(dataset);
%?數據大小與屬性數量不一致,則報錯
if?(size_data(2)-1)~=length(labels)
????error(‘屬性數量與數據集不一致!‘)
end
classList?=?dataset(:size_data(2));
%全為同一類,熵為0,返回
if?length(unique(classList))==1
????myTree?=??char(classList(1));
????return?
end
%%屬性集為空,應該用找最多數的那一類,這里取值NONE
if?size_data(2)?==?1
????myTree?=??‘NONE‘;
????%myTree?=??char(classList(1));
????return
end
%?選取特征屬性
bestFeature?=?chooseFeature(dataset);?
bestFeatureLabel?=?char(labels(bestFeature));
%?構建樹
myTree?=?containers.Map;
leaf?=?containers.Map;
%?該屬性下的不同取值?
featValues?=?dataset(:bestFeature);?
uniqueVals?=?unique(featValues);
%?刪除該屬性
labels=[labels(1:bestFeature-1)?labels(bestFeature+1:length(labels))];?%刪除該屬性
%?對該屬性下不同取值,遞歸調用ID3函數
for?i=1:length(uniqueVals)
????subLabels?=?labels(:)‘;
????value?=?char(uniqueVals(i));
????subdata?=?splitDataset(datasetbestFeaturevalue);%數據集分割
????leaf(value)?=?createTree(subdatasubLabels);?%遞歸調用
????myTree(char(bestFeatureLabel))?=?leaf;
end
end
%%?計算信息熵
function?shannonEnt?=?calShannonEnt(dataset)
data_size?=?size(dataset);
labels?=?dataset(:data_size(2));
numEntries?=?data_size(1);
labelCounts?=?containers.Map;
for?i?=?1:length(labels)
????label?=?char(labels(i));
????if?labelCounts.isKey(label)
????????labelCounts(label)?=?labelCounts(label)+1;?
????else
????????labelCounts(label)?=?1;
????end??
end
shannonEnt?=?0.0;
for?key?=?labelCounts.keys
????key?=?char(key);
????labelCounts(key);
????prob?=?labelCounts(key)?/?numEntries;
????shannonEnt?=?shannonEnt?-?prob*(log(prob)/log(2));
end??
end
%%?選擇熵最小的屬性特征
function?bestFeature=chooseFeature(dataset~)
baseEntropy?=?calShannonEnt(dataset);
data_size?=?size(dataset);
numFeatures?=?data_size(2)?-?1;
minEntropy?=?2.0;
bestFeature?=?0;
for?i?=?1:numFeatures
????uniqueVals?=?unique(dataset(:i));
????newEntropy?=?0.0;
????for?j=1:length(uniqueVals)
????????value?=?uniqueVals(j);
????????subDataset?=?splitDataset(datasetivalue);
????????size_sub?=?size(subDataset);
????????prob?=?size_sub(1)/data_size(1);
????????%ShannonEnt?=?calShannonEnt(subDataset);
????????newEntropy?=?newEntropy?+?prob*calShannonEnt(subDataset);
????end
????%gain?=?baseEntropy-?newEntropy;
????if?newEntropy ????????minEntropy?=?newEntropy;
????????bestFeature?=?i;
????end
end
end
%%?分割數據集,取出該特征
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件??????53649??2019-03-17?14:25??ID3_shared\car_data.csv
?????文件????????425??2019-03-17?13:44??ID3_shared\golf.csv
?????文件???????5594??2019-04-21?21:58??ID3_shared\ID3.m
?????文件???????1396??2019-05-22?19:05??ID3_shared\ID3_run.m
?????文件????????464??2019-03-17?14:17??ID3_shared\sale.csv
?????文件????????614??2019-03-11?16:17??ID3_shared\watermelon.csv
?????目錄??????????0??2019-06-05?16:20??ID3_shared
-----------?---------??----------?-----??----
????????????????62142????????????????????7
- 上一篇:基于顏色的matlab代碼
- 下一篇:基于simuli
nk的儲能逆變器VF控制仿真
評論
共有 條評論