核心数据抽取 郝伟,2021/01/05

[TOC]

1. poc(字典文件)数据提取

数据最终输出至 file_out

# -*- coding: utf-8 -*-
"""
Created on Tue Jan  5 16:20:00 2021

@author: Administrator
""" 
import json
import pandas as pd

def read_lines(filepath):
    '''
    loads and returns all text from a file.
    '''
    lines = []
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    return lines

# file1 and file2 store poc data and nmap data, respectively.
file_poc=r'd:\data\pocs.json' 
file_out=r'd:\data\pocs.csv'

# read and construct a json data
pocs=json.loads(''.join(read_lines(file_poc)))

cols=['key', 'port', 'rule', 'path', 'create']

items=[]
for key in pocs:
    data = pocs[key] 
    items.append([key, data['port'],  data['rule'],  data['path'],  data['create']])

# create a data frame
df = pd.DataFrame(items, columns=cols)#, index=keys)

# save to a csv file
df.to_csv(file_out, index=True, sep=',')

执行输出

                                                   key  ...  create
0                             Apache Solr_Unauthorized  ...  system
1                                 CouchDB_Unauthorized  ...  system
2                                  Docker_Unauthorized  ...  system
3                                  Domino_Unauthorized  ...  system
4                                 Elastic_Unauthorized  ...  system
5                                  Esccms_Unauthorized  ...  system
..                                                 ...  ...     ...
813                       Setelsa-Conacwin-3.7.1.2_LFI  ...  system
814                            Adobe-ColdFusion_FileUp  ...  system
815                           ManageEngine-Desktop_RCE  ...  system
816                                    Struts2-061_RCE  ...  system
[817 rows x 5 columns]

2. nmap数据提取

# -*- coding: utf-8 -*-
"""
Created on Tue Jan  5 16:20:00 2021

@author: Administrator
""" 
import json
import pandas as pd

def read_lines(filepath):
    '''
    loads and returns all text from a file.
    '''
    lines = []
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    return lines

# file1 and file2 store poc data and nmap data, respectively.
file_nmap=r'd:\data\nmapscan.json'
file_out=r'd:\data\nmapscan.csv'


cols=['name', 'port', 'product', 'state', 'version', 'extrainfo']

# represents the lineno-th line of nmap file in json
nmaps_name_product = []

for line in read_lines(file_nmap):
    try:
        nmaps_ports =  json.loads(line)['ipinfo']['ports'] 
        for item in nmaps_ports: # item is port, use 'name' & 'product' to meet 'rule'
            nmaps_name_product.append([nmaps_ports[item]['name'], 
                               item, 
                               nmaps_ports[item]['product'],
                               nmaps_ports[item]['state'],
                               nmaps_ports[item]['version'],
                               nmaps_ports[item]['extrainfo']])
    except:
        pass

# create a data frame
df = pd.DataFrame(nmaps_name_product, columns=cols)#, index=keys)

# save to a csv file
df.to_csv(file_out, index=True, sep=',')

结果

    name    port    product    state    version    extrainfo
0    msrpc    135    Microsoft Windows RPC    open        
1    netbios-ssn    139    Microsoft Windows netbios-ssn    open        
2    microsoft-ds    445    Microsoft Windows 2003 or 2008 microsoft-ds    open        
3    msrpc    1025    Microsoft Windows RPC    open        
4    ssh    22    OpenSSH    open    8.1p1 Debian 1    protocol 2.0
5    ssh    22    OpenSSH    open    7.6p1 Ubuntu 4ubuntu0.3    Ubuntu Linux; protocol 2.0
6    http    5000    Tornado httpd    open    6.1    
7    http    5001    BaseHTTPServer    open    0.6    Python 3.8.5
8    http    9999    Tornado httpd    open    6.1    
9    mongodb    27017    MongoDB    open    4.2.3    
..   ..   ..
95    vmware-auth    902    VMware Authentication Daemon    open    1.1    Uses VNC, SOAP
96    ms-wbt-server    3389    Microsoft Terminal Services    open        
97    http    5357    Microsoft HTTPAPI httpd    open    2    SSDP/UPnP
98    ssh    22        closed        
99    http    80    VMware ESXi Server httpd    open        
100    https    443        open        
101    vmware-auth    902    VMware Authentication Daemon    open    1.1    Uses VNC, SOAP
102    http-alt    8000        open        
103    soap    9080    gSOAP    open    2.8

results matching ""

    No results matching ""