-
大小: 715KB文件類型: .rar金幣: 2下載: 0 次發(fā)布日期: 2024-02-05
- 語(yǔ)言: C#
- 標(biāo)簽: 數(shù)據(jù)庫(kù)??抓取數(shù)據(jù)??
資源簡(jiǎn)介
包含省市區(qū)街道4級(jí)數(shù)據(jù),sql數(shù)據(jù)表,還有一個(gè)用c#寫(xiě)的抓取國(guó)家統(tǒng)計(jì)局最新數(shù)據(jù)的程序,可以擴(kuò)展成5級(jí)!

代碼片段和文件信息
using?System;
using?System.Collections.Generic;
using?System.IO;
using?System.Net;
using?System.Text;
using?System.Text.Regularexpressions;
using?System.Web;
///?
///?????網(wǎng)頁(yè)采集輔助類
///?
public?static?class?Collectionhelper
{
????///?
????///?????取得字符里的Dom元素?不包含元素屬性
????///?
????///?
????///?
????///?
????public?static?List?GetDomElem(string?source?string?domElem)
????{
????????var?matchList?=?new?List();
????????string?regStr?=?string.Format(“<{0}[^>]*?>[\\s\\S]+?<\\/{0}>“?domElem);
????????try
????????{
????????????var?regex?=?new?Regex(regStr?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????MatchCollection?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????matchList.Add(match.Value);
????????????}
????????}
????????catch?(Exception?ex)
????????{
????????????matchList.Add(ex.Message);
????????}
????????return?matchList;
????}
????///?
????///?????取得字符里的Dom元素?包含元素屬性?如:class=“aa“
????///?
????///?
????///?
????///?
????///?
????public?static?List?GetDomElemByAttr(string?source?string?tagName?string?tagValue)
????{
????????var?matchList?=?new?List();
????????string?regStr?=
????????????string.Format(
????????????????@“<(?[\w]+)[^>]*\s{0}[\s]*?=[\s]*?(?[““‘]?){1}(?(Quote)\k)[““‘]?[^>]*>((?<\k[^>]*>)|\k>(?<-Nested>)|[\s\S]*?)*\k>“
????????????????tagName.ToLower()?tagValue);
????????try
????????{
????????????var?regex?=?new?Regex(regStr?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????var?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????matchList.Add(match.Value);
????????????}
????????}
????????catch?(Exception?ex)
????????{
????????????matchList.Add(ex.Message);
????????}
????????return?matchList;
????}
????///?
????///?????取得字符里的A元素鍵值對(duì)??[name,url]
????///?
????///?
????///?
????public?static?Dictionary?GetDomElem_A(string?source)
????{
????????var?matchList?=?new?Dictionary();
????????const?string?pattern?=?“]*??href=[\“‘](?[^\“‘]*?)[\“‘][^>]*?>(?[\\w\\W]*?) “;
????????try
????????{
????????????var?regex?=?new?Regex(pattern?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????MatchCollection?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????string?key?=?RemoveHtml(match.Value);
????????????????if?(!matchList.ContainsKey(key))
????????????????{
????????????????????matchList.Add(key?GetUrlArray(matc
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件???????9891??2016-06-29?17:44??省市區(qū)街道\Collection
?????文件????????460??2018-11-06?16:50??省市區(qū)街道\data\Default.aspx
?????文件??????10026??2018-11-08?14:54??省市區(qū)街道\data\Default.aspx.cs
?????文件????????535??2018-11-08?10:16??省市區(qū)街道\Whir_Cmn_Area.cs
?????文件???32440950??2018-11-08?14:57??省市區(qū)街道\截止2017年10月31日.sql
?????目錄??????????0??2018-11-08?15:05??省市區(qū)街道\data
?????目錄??????????0??2018-11-08?15:05??省市區(qū)街道
-----------?---------??----------?-----??----
?????????????32461862????????????????????7
評(píng)論
共有 條評(píng)論