使用Python/Pandas解析嵌套JSON

4
我想解析这个JSON响应:
{
   "count":2,
   "next":null,
   "previous":null,
   "results":[
      {
         "id":123,
         "type_vname":"Suspicious Remote Desktop",
         "category":"LATERAL MOVEMENT",
         "src_ip":"192.168.1.1",
         "state":"fixed",
         "description":null,
         "t_score":70,
         "c_score":70,
         "first_timestamp":"2017-12-13T18:51:22Z",
         "last_timestamp":"2017-12-13T18:51:22Z",
         "detection_detail_set":[
            {
               "id":1234567,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":1234,
               "dst_ip":"192.168.1.1",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-13T18:51:22Z",
               "last_timestamp":"2017-12-13T18:51:22Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            },
            {
               "id":89123456,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":5678,
               "dst_ip":"192.168.1.1",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-13T18:50:18Z",
               "last_timestamp":"2017-12-13T18:50:18Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            }
         ],
         "dns_set":[

         ],
         "relayed_comm_set":[

         ],
         "sensor_luid":"abc1pdj",
         "summary":{
            "internal_targets":1,
            "anomalous_events":2,
            "probable_owner":"user"
         },
         "host":"https://192.168.1.2/api/detection_details",
         "url":"https://192.168.1.2/api/detection_details",
         "tags":[

         ],
         "targets_key_asset":false,
         "triage_rule_id":null
      },
      {
         "id":1235,
         "type_vname":"Suspicious Remote Desktop",
         "category":"LATERAL MOVEMENT",
         "src_ip":"192.168.1.2",
         "state":"fixed",
         "description":null,
         "t_score":70,
         "c_score":70,
         "first_timestamp":"2017-12-11T19:11:46Z",
         "last_timestamp":"2017-12-11T19:11:46Z",
         "detection_detail_set":[
            {
               "id":123445,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":4958,
               "dst_ip":"192.168.1.2",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-11T19:11:46Z",
               "last_timestamp":"2017-12-11T19:11:46Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            },
            {
               "id":1274857,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":15423,
               "dst_ip":"192.168.1.2",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-11T19:11:46Z",
               "last_timestamp":"2017-12-11T19:11:46Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            },
            {
               "id":137847,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":93238,
               "dst_ip":"192.168.1.2",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-11T19:10:53Z",
               "last_timestamp":"2017-12-11T19:10:53Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            },
            {
               "id":2376849874,
               "description":"Suspicious Remote Desktop",
               "dst_host_id":15423,
               "dst_ip":"192.168.1.2",
               "count":null,
               "count_pos":null,
               "dst_dns":null,
               "dst_port":80,
               "dst_geo":null,
               "proto":null,
               "first_timestamp":"2017-12-11T19:10:53Z",
               "last_timestamp":"2017-12-11T19:10:53Z",
               "total_bytes_sent":null,
               "total_bytes_rcvd":null,
               "url":"https://192.168.1.2/api/detection_details"
            }
         ],
         "dns_set":[

         ],
         "relayed_comm_set":[

         ],
         "sensor_luid":"abcery",
         "summary":{
            "internal_targets":1,
            "anomalous_events":4,
            "probable_owner":"user"
         },
         "host":"https://192.168.1.2/api/detection_details",
         "url":"https://192.168.1.2/api/detection_details",
         "tags":[

         ],
         "targets_key_asset":false,
         "triage_rule_id":null
      }
   ]
}

将数据转换成dataframe,以便可以使用以下标题将其to_csv为.csv文件:json数据。
count
next
previous
results_id
results_type_vname
results_category
results_src_ip
results_state
results_description
results_t_score
results_c_score
results_first_timestamp
results_last_timestamp
results_dns_set
results_relayed_comm_set
results_sensor_luid
results_host
results_url
results_tags
results_targets_key_asset
results_triage_rule_id
summary_internal_targets
summary_anomalous_events
summary_probable_owner
detection_id
detection_description
detection_dst_host_id
detection_dst_ip
detection_count
detection_count_pos
detection_dst_dns
detection_dst_port
detection_dst_geo
detection_proto
detection_first_timestamp
detection_last_timestamp
detection_total_bytes_sent
detection_total_bytes_rcvd
detection_url

我在 Stack Overflow 上进行了搜索,并编写了一些自己的代码(json 响应在“data”中):

import pandas as pd
from pandas.io.json import json_normalize

df = pd.DataFrame(data)
df = json_normalize(data=df['results'], record_path='detection_detail_set', 
                            meta=['category', 'id'], record_prefix='results_', errors='ignore')

df = df.head()

df.to_csv('Output.csv', index=False)

我将为您进行翻译:

我在响应中获取到以下标头(包含数据):

results_count
results_count_pos
results_description
results_dst_dns
results_dst_geo
results_dst_host_id
results_dst_ip
results_dst_port
results_first_timestamp
results_id
results_last_timestamp
results_proto
results_total_bytes_rcvd
results_total_bytes_sent
results_url
category
id

我感觉我已经走了一半的路。我尝试了几种组合和从其他SO帖子中获得的建议来获取剩余的数据。到目前为止,没有任何效果。我知道我遇到问题是由于嵌套,只需要找到一种方法来获得所需的结果。非常感谢您的帮助!

1个回答

0

看起来是正确的想法,只需要将results层与解包的detection层合并:

results = (json_normalize(data=df["results"], errors="ignore")
           .drop("detection_detail_set", 1)
           .add_prefix("results_"))
results.columns = results.columns.str.replace("results_summary\\.", "results_")

detection = json_normalize(data=df['results'], meta=['category', 'id'], 
                           record_path='detection_detail_set',  
                           record_prefix="detection_", errors='ignore')

master = results.merge(detection, how="left", 
                       left_on=["results_id", "results_category"], 
                       right_on=["id", "category"])

master.columns
Index(['results_c_score', 'results_category', 'results_description',
       'results_dns_set', 'results_first_timestamp', 'results_host',
       'results_id', 'results_last_timestamp', 'results_relayed_comm_set',
       'results_sensor_luid', 'results_src_ip', 'results_state',
       'results_anomalous_events', 'results_internal_targets',
       'results_probable_owner', 'results_t_score', 'results_tags',
       'results_targets_key_asset', 'results_triage_rule_id',
       'results_type_vname', 'results_url', 'detection_count',
       'detection_count_pos', 'detection_description', 'detection_dst_dns',
       'detection_dst_geo', 'detection_dst_host_id', 'detection_dst_ip',
       'detection_dst_port', 'detection_first_timestamp', 'detection_id',
       'detection_last_timestamp', 'detection_proto',
       'detection_total_bytes_rcvd', 'detection_total_bytes_sent',
       'detection_url', 'category', 'id'],
      dtype='object')

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接