資源簡介
python爬取360搜索引擎圖片,用于物體識別和樣本訓練 自定義函數def getBaiduImag(category,length,path),用于采集360圖片,三個參數分別為搜索的“關鍵詞”category,采集的圖片數量length,保存圖片的路徑path。 自動檢索當前下載目錄下已有的圖片數量,在此基礎上再次采集length張圖片。 需要導入的庫文件有import requests、import json、import urllib、import os
代碼片段和文件信息
import?requests
import?json
import?urllib
import?pypinyin
import?os
#?不帶聲調的(style=pypinyin.NORMAL)
def?pinyin(word):
????s?=?‘‘
????for?i?in?pypinyin.pinyin(word?style=pypinyin.NORMAL):
????????s?+=?‘‘.join(i)
????return?s
Collect_Picture_category?=?‘蘋果‘
Collect_Picture_length?=?100
Collect_Picture_SavePath?=?‘D:/test/1/‘
Collect_Picture_Source_Index?=?2
Collect_Picture_Source?=?pinyin(‘360圖片‘)
def?CurrentPicture_list():
????filename_number?=?[]
????Collect_Pic_path?=?Collect_Picture_SavePath[0:len(Collect_Picture_SavePath)?-?1]
????for?files?in?os.listdir(Collect_Pic_path):
????????if?files.endswith(“.jpg“):??#?填寫規則
????????????(filename?extension)?=?os.path.splitext(files)
????????????if?filename[0:len(Collect_Picture_Source?+?‘_‘?+?pinyin(
????????????????????Collect_Picture_category)?+?‘_‘)]?==?Collect_Picture_Source?+?‘_‘?+?pinyin(
????????????????Collect_Picture_category)?+?‘_‘:
????????????????filename_number.append(
????????????????????filename[len(Collect_Picture_Source?+?‘_‘?+?pinyin(Collect_Picture_category)?+?‘_‘):])
????return?filename_number
#從百度圖片搜索結果中抓取圖片,相當于在搜索框中直接搜索
def?get360Imag(Collect_Picture_categoryCollect_Picture_lengthCollect_Picture_SavePath):
????if?not?os.path.exists(Collect_Picture_SavePath):
????????os.mkdir(Collect_Picture_SavePath)
????try:
????????#最后面的參數pn代表從pn開始抓取,rn為抓取的圖片數量
????????if?len(CurrentPicture_list())?!=?0:
????????????start_Collect_Index?=?int(max(CurrentPicture_list()))?+?1
????????else:
????????????start_Collect_Index?=?0
????????print(‘start_Collect_Index:‘?+?str(start_Collect_Index))
????????Current_Collect_Length?=?Collect_Picture_length
????????n?=?0
????????Each_start_Index?=?start_Collect_Index
????????for?x?in?range(10000):
????????????print(‘****************‘)
????????????print(‘x:‘?+?str(x))
????????????print(‘Each_start_Index:‘+str(Each_start_Index))
?????
評論
共有 條評論