xxxx18一60岁hd中国/日韩女同互慰一区二区/西西人体扒开双腿无遮挡/日韩欧美黄色一级片 - 色护士精品影院www

資源簡(jiǎn)介

基于深度學(xué)習(xí)的文本相似度計(jì)算模型和代碼,親自跑過(guò)可以直接使用,對(duì)nlp領(lǐng)域的學(xué)習(xí)非常有借鑒意義,在智能問(wèn)答系統(tǒng)上經(jīng)常會(huì)用到。

資源截圖

代碼片段和文件信息

#?!/usr/bin/env?python??
#?-*-?coding:utf-8?_*-??
“““?
@Author:yanqiang?
@File:?build_input.py?
@Time:?2018/11/30?17:41
@Software:?PyCharm?
@Description:?構(gòu)建模型的輸入
“““
from?collections?import?Counter
from?keras.preprocessing.sequence?import?pad_sequences
import?numpy?as?np
from?gensim.models?import?Word2Vec

#?train?=?load_atec()


#?train?dev?test=load_ccks()


def?select_best_length(trainlimit_ratio=0.95):
????“““
????根據(jù)數(shù)據(jù)集的句子長(zhǎng)度,選擇最佳的樣本max-length
????:param?limit_ratio:句子長(zhǎng)度覆蓋度,默認(rèn)覆蓋95%以上的句子
????:return:
????“““
????len_list?=?[]
????max_length?=?0
????cover_rate?=?0.0
????for?q1?q2?in?zip(train[‘q1‘]?train[‘q2‘]):
????????len_list.append(len(q1))
????????len_list.append(len(q2))
????all_sent?=?len(len_list)
????sum_length?=?0
????len_dict?=?Counter(len_list).most_common()
????for?i?in?len_dict:
????????sum_length?+=?i[1]?*?i[0]
????average_length?=?sum_length?/?all_sent
????for?i?in?len_dict:
????????rate?=?i[1]?/?all_sent
????????cover_rate?+=?rate
????????if?cover_rate?>=?limit_ratio:
????????????max_length?=?i[0]
????????????break
????print(‘a(chǎn)verage_length:‘?average_length)
????print(‘max_length:‘?max_length)
????return?max_length


#?select_best_length()

#返回train_xy
def?build_data(train):
????“““
????構(gòu)建數(shù)據(jù)集
????:return:
????“““
????#遍歷每一個(gè)樣本,獲取樣本的問(wèn)題q1的樣本集合list
????sample_x_left?=?train.q1.apply(lambda?x:?[char?for?char?in?x?if?char]).tolist()
????#?遍歷每一個(gè)樣本,獲取樣本的問(wèn)題q2的樣本集合list
????sample_x_right?=?train.q2.apply(lambda?x:?[char?for?char?in?x?if?char]).tolist()
????vocabs?=?{‘UNK‘}
????#構(gòu)建詞匯表
????for?x_left?x_right?in?zip(sample_x_left?sample_x_right):
????????for?char?in?x_left?+?x_right:
????????????vocabs.add(char)

????sample_x?=?[sample_x_left?sample_x_right]
????sample_y?=?train.label.tolist()
????print(len(sample_x_left)?len(sample_x_right))
????datas?=?[sample_x?sample_y]
????#{‘這‘:?0?‘純‘:?1?‘代‘:?2?‘萬(wàn)‘:?3?‘(‘:?4?‘柳‘:?5?‘扮‘:?6?‘翻‘:?7?‘水‘:?8................}
????word_dict?=?{wd:?index?for?index?wd?in?enumerate(list(vocabs))}
????#print(word_dict)
????vocab_path?=?‘model/vocab.txt‘
????with?open(vocab_path?‘w‘?encoding=‘utf-8‘)?as?f:
????????f.write(‘\n‘.join(list(vocabs)))
????return?datas?word_dict


def?convert_data(datas?word_dict?MAX_LENGTH):
????“““
????將數(shù)據(jù)轉(zhuǎn)換成keras所能處理的格式
????:return:?
????“““
????sample_x?=?datas[0]
????sample_y?=?datas[1]
????sample_x_left?=?sample_x[0]
????sample_x_right?=?sample_x[1]
????left_x_train?=?[[word_dict[char]?for?char?in?data]?for?data?in?sample_x_left]
????right_x_train?=?[[word_dict[char]?for?char?in?data]?for?data?in?sample_x_right]
????y_train?=?[int(i)?for?i?in?sample_y]
????left_x_train?=?pad_sequences(left_x_train?MAX_LENGTH?padding=‘pre‘)
????right_x_train?=?pad_sequences(right_x_train?MAX_LENGTH?padding=‘pre‘)
????y_train?=?np.expand_dims(y_train?2)
????return?left_x_train?right_x_train?y_train


def?train_w2v(datas):
????“““
????訓(xùn)練詞向量
????:return:
????“““
????sents?=?datas[0][0]?+?datas[0][1]
????#print(sents)
????model?=?Word2Vec(sentences=sents?size=300?min_

?屬性????????????大小?????日期????時(shí)間???名稱(chēng)
-----------?---------??----------?-----??----

?????文件???????6148??2020-02-26?18:01??sentence-similarity-project\.DS_Store

?????文件????????141??2018-12-05?17:08??sentence-similarity-project\.gitignore

?????文件????????128??2020-03-28?21:48??sentence-similarity-project\.idea\libraries\R_User_Library.xml

?????文件????????315??2020-03-28?21:46??sentence-similarity-project\.idea\misc.xml

?????文件????????313??2020-03-28?21:46??sentence-similarity-project\.idea\modules.xml

?????文件????????611??2020-03-28?21:48??sentence-similarity-project\.idea\sentence-similarity-project.iml

?????文件??????16934??2020-03-29?15:07??sentence-similarity-project\.idea\workspace.xml

?????文件???????4702??2020-03-28?23:25??sentence-similarity-project\build_input.py

?????文件???????1753??2020-03-05?21:25??sentence-similarity-project\data_loader.py

?????文件???????1780??2020-03-05?21:40??sentence-similarity-project\evalute.py

?????文件????3485318??2020-03-05?21:23??sentence-similarity-project\input\atec\atec_nlp_sim_train.csv

?????文件????5625804??2018-12-05?17:08??sentence-similarity-project\input\atec\atec_nlp_sim_train_add.csv

?????文件????????946??2018-12-05?17:08??sentence-similarity-project\input\atec\readme.txt

?????文件????????609??2018-12-05?17:08??sentence-similarity-project\input\ccks\Readme

?????文件?????760958??2018-12-05?17:08??sentence-similarity-project\input\ccks\task3_dev.txt

?????文件????7355965??2018-12-05?17:08??sentence-similarity-project\input\ccks\task3_train.txt

?????文件????8555401??2018-12-05?17:08??sentence-similarity-project\input\ccks\test_with_id.txt

?????文件??????23854??2020-03-29?12:06??sentence-similarity-project\model\model.png

?????文件??????29593??2020-03-29?12:27??sentence-similarity-project\model\result_atec.png

?????文件??????25260??2018-12-05?17:08??sentence-similarity-project\model\result_ccks.png

?????文件????8809848??2020-03-29?12:27??sentence-similarity-project\model\tokenvec_bilstm2_siamese_model.h5

?????文件????7847540??2020-03-29?12:06??sentence-similarity-project\model\token_vec_300.bin

?????文件??????10735??2020-03-29?12:06??sentence-similarity-project\model\vocab.txt

?????文件???????4329??2020-03-29?12:06??sentence-similarity-project\train_siamese_network.py

?????文件???????5003??2020-03-28?23:25??sentence-similarity-project\__pycache__\build_input.cpython-36.pyc

?????文件???????1481??2020-03-28?21:48??sentence-similarity-project\__pycache__\data_loader.cpython-36.pyc

?????文件?????175767??2020-02-26?18:00??sentence-similarity-project\文本相似度建模.pdf

?????目錄??????????0??2020-03-28?21:48??sentence-similarity-project\.idea\inspectionProfiles

?????目錄??????????0??2020-03-28?21:48??sentence-similarity-project\.idea\libraries

?????目錄??????????0??2020-03-05?21:23??sentence-similarity-project\input\atec

............此處省略9個(gè)文件信息

評(píng)論

共有 條評(píng)論

相關(guān)資源