核心数据抽取 郝伟,2021/01/05
[TOC]
1. poc(字典文件)数据提取
数据最终输出至 file_out
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 5 16:20:00 2021
@author: Administrator
"""
import json
import pandas as pd
def read_lines(filepath):
'''
loads and returns all text from a file.
'''
lines = []
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
return lines
# file1 and file2 store poc data and nmap data, respectively.
file_poc=r'd:\data\pocs.json'
file_out=r'd:\data\pocs.csv'
# read and construct a json data
pocs=json.loads(''.join(read_lines(file_poc)))
cols=['key', 'port', 'rule', 'path', 'create']
items=[]
for key in pocs:
data = pocs[key]
items.append([key, data['port'], data['rule'], data['path'], data['create']])
# create a data frame
df = pd.DataFrame(items, columns=cols)#, index=keys)
# save to a csv file
df.to_csv(file_out, index=True, sep=',')
执行输出
key ... create
0 Apache Solr_Unauthorized ... system
1 CouchDB_Unauthorized ... system
2 Docker_Unauthorized ... system
3 Domino_Unauthorized ... system
4 Elastic_Unauthorized ... system
5 Esccms_Unauthorized ... system
.. ... ... ...
813 Setelsa-Conacwin-3.7.1.2_LFI ... system
814 Adobe-ColdFusion_FileUp ... system
815 ManageEngine-Desktop_RCE ... system
816 Struts2-061_RCE ... system
[817 rows x 5 columns]
2. nmap数据提取
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 5 16:20:00 2021
@author: Administrator
"""
import json
import pandas as pd
def read_lines(filepath):
'''
loads and returns all text from a file.
'''
lines = []
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
return lines
# file1 and file2 store poc data and nmap data, respectively.
file_nmap=r'd:\data\nmapscan.json'
file_out=r'd:\data\nmapscan.csv'
cols=['name', 'port', 'product', 'state', 'version', 'extrainfo']
# represents the lineno-th line of nmap file in json
nmaps_name_product = []
for line in read_lines(file_nmap):
try:
nmaps_ports = json.loads(line)['ipinfo']['ports']
for item in nmaps_ports: # item is port, use 'name' & 'product' to meet 'rule'
nmaps_name_product.append([nmaps_ports[item]['name'],
item,
nmaps_ports[item]['product'],
nmaps_ports[item]['state'],
nmaps_ports[item]['version'],
nmaps_ports[item]['extrainfo']])
except:
pass
# create a data frame
df = pd.DataFrame(nmaps_name_product, columns=cols)#, index=keys)
# save to a csv file
df.to_csv(file_out, index=True, sep=',')
结果
name port product state version extrainfo
0 msrpc 135 Microsoft Windows RPC open
1 netbios-ssn 139 Microsoft Windows netbios-ssn open
2 microsoft-ds 445 Microsoft Windows 2003 or 2008 microsoft-ds open
3 msrpc 1025 Microsoft Windows RPC open
4 ssh 22 OpenSSH open 8.1p1 Debian 1 protocol 2.0
5 ssh 22 OpenSSH open 7.6p1 Ubuntu 4ubuntu0.3 Ubuntu Linux; protocol 2.0
6 http 5000 Tornado httpd open 6.1
7 http 5001 BaseHTTPServer open 0.6 Python 3.8.5
8 http 9999 Tornado httpd open 6.1
9 mongodb 27017 MongoDB open 4.2.3
.. .. ..
95 vmware-auth 902 VMware Authentication Daemon open 1.1 Uses VNC, SOAP
96 ms-wbt-server 3389 Microsoft Terminal Services open
97 http 5357 Microsoft HTTPAPI httpd open 2 SSDP/UPnP
98 ssh 22 closed
99 http 80 VMware ESXi Server httpd open
100 https 443 open
101 vmware-auth 902 VMware Authentication Daemon open 1.1 Uses VNC, SOAP
102 http-alt 8000 open
103 soap 9080 gSOAP open 2.8