資源簡介
手寫字訓練
代碼片段和文件信息
import?os
import?numpy?as?np
import?struct
import?PIL.Image
train_data_dir?=?“HWDB1.1trn_gnt“
test_data_dir?=?“HWDB1.1tst_gnt“
#?讀取圖像和對應的漢字
def?read_from_gnt_dir(gnt_dir=train_data_dir):
????def?one_file(f):
????????header_size?=?10
????????while?True:
????????????header?=?np.fromfile(f?dtype=‘uint8‘?count=header_size)
????????????if?not?header.size:?break
????????????sample_size?=?header[0]?+?(header[1]?<8)?+?(header[2]?<16)?+?(header[3]?<24)
????????????tagcode?=?header[5]?+?(header[4]?<8)
????????????width?=?header[6]?+?(header[7]?<8)
????????????height?=?header[8]?+?(header[9]?<8)
????????????if?header_size?+?width?*?height?!=?sample_size:
????????????????break
????????????image?=?np.fromfile(f?dtype=‘uint8‘?count=width?*?height).reshape((height?width))
????????????yield?image?tagcode
????for?file_name?in?os.listdir(gnt_dir):
????????if?file_name.endswith(‘.gnt‘):
????????????file_path?=?os.path.join(gnt_dir?file_name)
????????????with?open(file_path?‘rb‘)?as?f:
????????????????for?image?tagcode?in?one_file(f):
????????????????????yield?image?tagcode
import?scipy.misc
from?sklearn.utils?import?shuffle
import?tensorflow?as?tf
#?我取常用的前140個漢字進行測試
char_set?=?“的一是了我不人在他有這個上們來到時大地為子中你說生國年著就那和要她出也得里后自以會家可下而過天去能對小多然于心學么之都好看起發當沒成只如事把還用第樣道想作種開美總從無情己面最女但現前些所同日手又行意動方期它頭經長兒回位分愛老因很給名法間斯知世什兩次使身者被高已親其進此話常與活正感“
def?resize_and_normalize_image(img):
????#?補方
????pad_size?=?abs(img.shape[0]?-?img.shape[1])?//?2
????if?img.shape[0]?????????pad_dims?=?((pad_size?pad_size)?(0?0))
????else:
????????pad_dims?=?((0?0)?(pad_size?pad_size))
????img?=?np.lib.pad(img?pad_dims?mode=‘constant‘?constant_values=255)
????#?縮放
????img?=?scipy.misc.imresize(img?(64?-?4?*?2?64?-?4?*?2))
????img?=?np.lib.pad(img?((4?4)?(4?4))?mode=‘constant‘?constant_values=255)
????assert?img.shape?==?(64?64)
????img?=?img.flatten()
????#?像素值范圍-1到1
????img?=?(img?-?128)?/?128
????return?img
#?one?hot
def?convert_to_one_hot(char):
????vector?=?np.zeros(len(char_set))
????vector[char_set.index(char)]?=?1
????return?vector
#?由于數據量不大?可一次全部加載到RAM
train_data_x?=?[]
train_data_y?=?[]
for?image?tagcode?in?read_from_gnt_dir(gnt_dir=train_data_dir):
????tagcode_unicode?=?struct.pack(‘>H‘?tagcode).decode(‘gb2312‘)
????if?tagcode_unicode?in?char_set:
????????train_data_x.append(resize_and_normalize_image(image))
????????train_data_y.append(convert_to_one_hot(tagcode_unicode))
#?shuffle樣本
train_data_x?train_data_y?=?shuffle(train_data_x?train_data_y?random_state=0)
batch_size?=?128
num_batch?=?len(train_data_x)?//?batch_size
text_data_x?=?[]
text_data_y?=?[]
for?image?tagcode?in?read_from_gnt_dir(gnt_dir=test_data_dir):
????tagcode_unicode?=?struct.pack(‘>H‘?tagcode).decode(‘gb2312‘)
????if?tagcode_unicode?in?char_set:
????????text_data_x.append(resize_and_normalize_image(image))
????????text_data_y.append(convert_to_one_hot(tagcode_unicode)
- 上一篇:猜拳小游戲python代碼
- 下一篇:Python-turtle玫瑰花繪制
評論
共有 條評論