Python爬虫实现全国失信被执行人名单查询功能示例


本文摘自php中文网,作者不言,侵删。

这篇文章主要介绍了Python爬虫实现全国失信被执行人名单查询功能,涉及Python爬虫相关网络接口调用及json数据转换等相关操作技巧,需要的朋友可以参考下

本文实例讲述了Python爬虫实现全国失信被执行人名单查询功能。分享给大家供大家参考,具体如下:

一、需求说明

利用百度的接口,实现一个全国失信被执行人名单查询功能。输入姓名,查询是否在全国失信被执行人名单中。

二、python实现

版本1:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

# -*- coding:utf-8*-

import sys

reload(sys)

sys.setdefaultencoding('utf-8')

import time

import requests

time1=time.time()

import pandas as pd

import json

iname=[]

icard=[]

def person_executed(name):

  for i in range(0,30):

    try:

      url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899" \

      "&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95" \

      "&cardNum=&" \

      "iname="+str(name)+ \

      "&areaName=" \

      "&pn="+str(i*10)+ \

      "&rn=10" \

      "&ie=utf-8&oe=utf-8&format=json"

      html=requests.get(url).content

      html_json=json.loads(html)

      html_data=html_json['data']

      for each in html_data:

        k=each['result']

        for each in k:

          print each['iname'],each['cardNum']

          iname.append(each['iname'])

          icard.append(each['cardNum'])

    except:

      pass

if __name__ == '__main__':

  name="郭**"

  person_executed(name)

  print len(iname)

  #####################将数据组织成数据框###########################

  data=pd.DataFrame({"name":iname,"IDCard":icard})

  #################数据框去重####################################

  data1=data.drop_duplicates()

  print data1

  print len(data1)

  #########################写出数据到excel#########################################

  pd.DataFrame.to_excel(data1,"F:\\iname_icard_query.xlsx",header=True,encoding='gbk',index=False)

  time2=time.time()

  print u'ok,爬虫结束!'

  print u'总共耗时:'+str(time2-time1)+'s'

三、效果展示

"D:\Program Files\Python27\python.exe" D:/PycharmProjects/learn2017/全国失信被执行人查询.py
郭** 34122319790****5119
郭** 32032119881****2419
郭** 32032119881****2419
3
IDCard name
0 34122319790****5119 郭**
1 32032119881****2419 郭**
2
ok,爬虫结束!
总共耗时:7.72000002861s
Process finished with exit code 0

版本2:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

# -*- coding:utf-8*-

import sys

reload(sys)

sys.setdefaultencoding('utf-8')

import time

import requests

time1=time.time()

import pandas as pd

import json

iname=[]

icard=[]

courtName=[]

areaName=[]

caseCode=[]

duty=[]

performance=[]

disruptTypeName=[]

publishDate=[]

def person_executed(name):

  for i in range(0,30):

    try:

      url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899" \

      "&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95" \

      "&cardNum=&" \

      "iname="+str(name)+ \

      "&areaName=" \

      "&pn="+str(i*10)+ \

      "&rn=10" \

      "&ie=utf-8&oe=utf-8&format=json"

      html=requests.get(url).content

      html_json=json.loads(html)

      html_data=html_json['data']

      for each in html_data:

        k=each['result']

        for each in k:

          print each['iname'],each['cardNum'],each['courtName'],each['areaName'],each['caseCode'],each['duty'],each['performance'],each['disruptTypeName'],each['publishDate']

          iname.append(each['iname'])

          icard.append(each['cardNum'])

          courtName.append(each['courtName'])

          areaName.append(each['areaName'])

          caseCode.append(each['caseCode'])

          duty.append(each['duty'])

          performance.append(each['performance'])

          disruptTypeName.append(each['disruptTypeName'])

          publishDate.append(each['publishDate'])

    except:

      pass

if __name__ == '__main__':

  name="郭**"

  person_executed(name)

  print len(iname)

  #####################将数据组织成数据框###########################

  # data=pd.DataFrame({"name":iname,"IDCard":icard})

  detail_data=pd.DataFrame({"name":iname,"IDCard":icard,"courtName":courtName,"areaName":areaName,"caseCode":caseCode,"duty":duty,"performance":performance,\

               "disruptTypeName":disruptTypeName,"publishDate":publishDate})

  #################数据框去重####################################

  # data1=data.drop_duplicates()

  # print data1

  # print len(data1)

  detail_data1=detail_data.drop_duplicates()

  # print detail_data1

  # print len(detail_data1)

  #########################写出数据到excel#########################################

  pd.DataFrame.to_excel(detail_data1,"F:\\iname_icard_query.xlsx",header=True,encoding='gbk',index=False)

  time2=time.time()

  print u'ok,爬虫结束!'

  print u'总共耗时:'+str(time2-time1)+'s'

相关推荐:

Python爬虫实现取名字的代码实例

python爬虫实现教程转换成 PDF 电子书

以上就是Python爬虫实现全国失信被执行人名单查询功能示例的详细内容,更多文章请关注木庄网络博客!!

相关阅读 >>

Python需要英语基础吗

Python中整型的基本介绍(代码示例)

Python中import 与__import__() 之间的区别比较

黑马云课堂8天深入理解Python视频资料

Python for循环实例来解析什么是Python循环语句?

Python2.7和3.7的区别

实例详解Python生成器协程运算

Python get函数有什么作用?示例解析

Python中logging的详细介绍(附示例)

spyder和Python有什么关系

更多相关阅读请进入《Python》频道 >>




打赏

取消

感谢您的支持,我会继续努力的!

扫码支持
扫码打赏,您说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦

分享从这里开始,精彩与您同在

评论

管理员已关闭评论功能...