#include #include #include #include #include using?namespace?std; typedef?struct?Item//只有一個(gè)詞的頻繁項(xiàng) { string?sItem; int?iSupport; }ITEM; typedef?vector?VEC_STR; typedef?vector?VEC_VEC_STR; typedef?struct?MultiItem//高層的頻繁項(xiàng) { VEC_STR?vsItem; int?iSupport; }MULTIITEM; typedef?vector- ?VEC_ITEM;//只有一個(gè)詞的頻繁項(xiàng)集合
typedef?vector?VEC_MULTIITEM;//高層的頻繁項(xiàng)集合 typedef?map?MAP_STR_INT;//存儲(chǔ)詞語(yǔ)及其出現(xiàn)頻率 void?readFile(ifstream?&?const?string?&?VEC_STR?&); void?countWord(VEC_STR?*?MAP_STR_INT?&?const?char?separator=‘\\‘); void?generateLevel1Set(MAP_STR_INT?*?VEC_ITEM?&); void?generateLevel2(VEC_ITEM?*?VEC_MULTIITEM?&); void?cycGenerator(VEC_MULTIITEM?*?VEC_STR?&?ofstream?&); void?generateHighLevelSet(VEC_MULTIITEM?*?VEC_MULTIITEM?&?VEC_STR?&); void?generateInitialHigh(VEC_MULTIITEM?*?VEC_VEC_STR?&); void?pruning(VEC_VEC_STR?*?VEC_MULTIITEM?*?VEC_MULTIITEM?&); bool?find(VEC_MULTIITEM?*?VEC_STR?*); void?countSupport(VEC_STR?*?VEC_MULTIITEM?&); void?generateFrequentSet(VEC_MULTIITEM?*?VEC_MULTIITEM?&); void?printFrequentSet(VEC_ITEM?*?ostream?&os=cout); void?printFrequentSet(VEC_MULTIITEM?*?ostream?&os=cout); const?int?MINSUPPORT?=?2;//最小支持度 int?main() { //從源文件讀取數(shù)據(jù) ifstream?infile; VEC_STR?vs_word; readFile(infile“in.txt“vs_word); infile.close(); //計(jì)算所有詞語(yǔ)的出現(xiàn)頻率 MAP_STR_INT?word_count; countWord(&vs_word?word_count); //生成單個(gè)詞語(yǔ)的頻繁項(xiàng)集合 VEC_ITEM?level1Set; generateLevel1Set(&word_count?level1Set); //生成具有兩個(gè)詞語(yǔ)的頻繁項(xiàng)集合 VEC_MULTIITEM?level2?level2Set; generateLevel2(&level1Set?level2); countSupport(&vs_word?level2); generateFrequentSet(&level2?level2Set); //生成具有三個(gè)詞語(yǔ)的頻繁項(xiàng)集合 VEC_MULTIITEM?level3Set; generateHighLevelSet(&level2Set?level3Set?vs_word); //輸出單個(gè)詞的頻繁項(xiàng)到文件 ofstream?outfile; outfile.open(“out.txt“); if(!outfile) cout<<“Unable?to?open?this?file!“< printFrequentSet(&level1Set?outfile); //循環(huán)產(chǎn)生高層的頻繁項(xiàng)集合并輸出到文件 cycGenerator(&level2Set?vs_word?outfile); cout<<“OK!“< return?0; } /**從源文件讀取詞語(yǔ) *每一行作為一個(gè)字符串存入向量中 */ void?readFile(ifstream?&infile?const?string?&filename?VEC_STR?&vs_word) { infile.close(); infile.clear(); infile.open(filename.c_str());? if(!infile) cout<<“Unable?to?open?this?file!“< string?word; while(getline(infile?word)) vs_word.push_back(word); } /**計(jì)算每個(gè)詞語(yǔ)的支持度 *從字符串中提取出所有詞語(yǔ),與其支持度一道存入map中 */ void?countWord(VEC_STR?*vs_word?MAP_STR_INT?&word_count?const?char?separator) { string?sentenceword; for(unsigned?int?i=0;?isize();?++i) { sentence?=?(*vs_word)[i]; while(sentence.find(separator)!=-1) { word?=?sentence.substr(0sentence.find(separator)); ++word_count[word]; sentence?=?sentence.substr(sentence.find(separator)+1?sentence.size()-1); } ++word_count[sentence]; } } /**找出頻繁1項(xiàng)集的集合 */ void?generateLevel1Set(MAP_STR_INT?*pWord_Co
?屬性????????????大小?????日期????時(shí)間???名稱(chēng)
-----------?---------??----------?-----??----
?????文件???????8080??2008-11-06?23:13??Apriori.cpp
?????文件????????295??2008-11-05?16:19??in.txt
-----------?---------??----------?-----??----
?????????????????8375????????????????????2