Jolt转换 - 将所有键转换为小写?

3

I've the following JSON:-

{
  "ROWNUM": "328938",
  "SOURCE_NAME": "I2323",
  "ID": "333333",
  "FIRST_NAME": "A121221",
  "KNOWN_AS": "G1223321",
  "LAST_NAME": "sadsadsd",
  "PLACE_OF_BIRTH": "Indsadsadsaddsaia",
  "DATE_OF_BIRTH": "sadsaddsa",
  "UPRN": "sadsadsad",
  "POST_CODE": "asdsadsda",
  "POST_TOWN": "GLASGOW",
  "ESTIMATED_DOB": "N",
  "LAST_UPDATED": "2019-02-11T13:57:05.264Z",
  "cluster_id": 3020,
  "aliases": [
    {
      "_id": {
        "timestamp": 1550152767,
        "machineIdentifier": 6505561,
        "processIdentifier": 59,
        "counter": 2775622,
        "time": 1550152767000,
        "timeSecond": 1550152767,
        "date": 1550152767000
      },
      "ROWNUM": "328938",
      "SOURCE_NAME": "I2323",
      "ID": "333333",
      "FIRST_NAME": "A121221",
      "KNOWN_AS": "G1223321",
      "LAST_NAME": "sadsadsd",
      "PLACE_OF_BIRTH": "Indsadsadsaddsaia",
      "DATE_OF_BIRTH": "sadsaddsa",
      "UPRN": "sadsadsad",
      "POST_CODE": "asdsadsda",
      "POST_TOWN": "GLASGOW",
      "ESTIMATED_DOB": "N",
      "LAST_UPDATED": "2019-02-11T13:57:05.264Z",
      "cluster_id": 3020,
      "score": "0.9997580647468567"
    },
    {
      "_id": {
        "timestamp": 1550152767,
        "machineIdentifier": 6505561,
        "processIdentifier": 59,
        "counter": 2775622,
        "time": 1550152767000,
        "timeSecond": 1550152767,
        "date": 1550152767000
      },
      "ROWNUM": "328938",
      "SOURCE_NAME": "I2323",
      "ID": "333333",
      "FIRST_NAME": "A121221",
      "KNOWN_AS": "G1223321",
      "LAST_NAME": "sadsadsd",
      "PLACE_OF_BIRTH": "Whatever",
      "DATE_OF_BIRTH": "sadsaddsa",
      "UPRN": "sadsadsad",
      "POST_CODE": "asdsadsda",
      "POST_TOWN": "PAISLEY",
      "ESTIMATED_DOB": "N",
      "LAST_UPDATED": "2019-02-11T13:57:05.264Z",
      "cluster_id": 3020,
      "score": "0.9997580647468567"
    }
  ]
}

有没有一种规范可以将所有键(包括嵌套对象中的键)都转换为小写?(在这种情况下,aliases下面的内容也要转换为小写)

以下方法适用于顶级键,但不适用于嵌套的键:

翻译结果仅供参考,如有不准确之处请谅解。

[
  {
    // unwrap the keys and values into literal
    // "key" : "A", "value" : "b"
    "operation": "shift",
    "spec": {
      "*": {
        "$": "&1.key",
        "@": "&1.value"
      }
    }
  },
  {
    "operation": "modify-overwrite-beta",
    "spec": {
      "*": {
        // Now that the origional key
        //  is on the "right hand side"
        //  lowercase it
        "key": "=toLower"
      }
    }
  },
  {
    // pivot back, the now lowercased keys
    "operation": "shift",
    "spec": {
      "*": {
        "value": "@(1,key)"
      }
    }
  }
]

谢谢!


正则表达式怎么样? 如果你输入一个像这样的正则表达式:"."*:那么每个键都会匹配 - Noixes
我本来希望通过使用Jolt转换来减少代码的脆弱性,但如果一切都无望了,我可能会考虑其他方法! - Gavin Gilmour
2个回答

6
这将产生期望的结果,扩展现有的转换:
[
  {
    // unwrap the keys and values into literal
    // "key" : "A", "value" : "b"
    "operation": "shift",
    "spec": {
      "*": {
        "$": "&1.key",
        "@": "&1.value"
      },
      //do the same for everything in aliases
      //&3 = aliases
      //&2 = array position
      //&1 = position of kvp
      "aliases": {
        "*": {
          "*": {
            "$": "&3.&2.&1.key",
            "@": "&3.&2.&1.value"
          }
        }
      }
    }
  },
  {
    "operation": "modify-overwrite-beta",
    "spec": {
      "*": {
        // Now that the origional key
        //  is on the "right hand side"
        //  lowercase it
        "key": "=toLower"
      },
      "aliases": {
        "*": {
          "*": {
            // Now that the origional key
            //  is on the "right hand side"
            //  lowercase it
            "key": "=toLower"
          }
        }
      }
    }
  },
  {
    // pivot back, the now lowercased keys
    "operation": "shift",
    "spec": {
      "*": {
        "value": "@(1,key)"
      },
      "aliases": {
        "*": {
          "*": {
            //&3 = aliases
            //&2 = array postion
            //@(1,key) values from "key"
            "value": "&3.[&2].@(1,key)"
          }
        }
      }
    }
  }
]

产生以下结果:
{
  "rownum": "328938",
  "source_name": "I2323",
  "id": "333333",
  "first_name": "A121221",
  "known_as": "G1223321",
  "last_name": "sadsadsd",
  "place_of_birth": "Indsadsadsaddsaia",
  "date_of_birth": "sadsaddsa",
  "uprn": "sadsadsad",
  "post_code": "asdsadsda",
  "post_town": "GLASGOW",
  "estimated_dob": "N",
  "last_updated": "2019-02-11T13:57:05.264Z",
  "cluster_id": 3020,
  "aliases": [
    {
      "_id": {
        "timestamp": 1550152767,
        "machineIdentifier": 6505561,
        "processIdentifier": 59,
        "counter": 2775622,
        "time": 1550152767000,
        "timeSecond": 1550152767,
        "date": 1550152767000
      },
      "rownum": "328938",
      "source_name": "I2323",
      "id": "333333",
      "first_name": "A121221",
      "known_as": "G1223321",
      "last_name": "sadsadsd",
      "place_of_birth": "Indsadsadsaddsaia",
      "date_of_birth": "sadsaddsa",
      "uprn": "sadsadsad",
      "post_code": "asdsadsda",
      "post_town": "GLASGOW",
      "estimated_dob": "N",
      "last_updated": "2019-02-11T13:57:05.264Z",
      "cluster_id": 3020,
      "score": "0.9997580647468567"
    },
    {
      "_id": {
        "timestamp": 1550152767,
        "machineIdentifier": 6505561,
        "processIdentifier": 59,
        "counter": 2775622,
        "time": 1550152767000,
        "timeSecond": 1550152767,
        "date": 1550152767000
      },
      "rownum": "328938",
      "source_name": "I2323",
      "id": "333333",
      "first_name": "A121221",
      "known_as": "G1223321",
      "last_name": "sadsadsd",
      "place_of_birth": "Whatever",
      "date_of_birth": "sadsaddsa",
      "uprn": "sadsadsad",
      "post_code": "asdsadsda",
      "post_town": "PAISLEY",
      "estimated_dob": "N",
      "last_updated": "2019-02-11T13:57:05.264Z",
      "cluster_id": 3020,
      "score": "0.9997580647468567"
    }
  ]
}

1

您可以按照以下测试演示的方式,实现自定义的 JsonParserDelegate 解析器。

可以从 https://github.com/hovanessyan/json_lowercase_all_keys.git 检出一个可工作的项目。

a)创建自己的解析器以将键转换为小写(KeysToLowercaseParser

b)覆盖 JsonFactory 并使用新的解析器

我已经将您的 json 内容粘贴到 test.json 中,在此测试中进行读取。

public class LowerCaseJsonTest {

    @Test
    public void name() throws IOException {

        ObjectMapper mapper = new ObjectMapper(new JsonFactory() {
            @Override
            protected JsonParser _createParser(byte[] data, int offset, int len, IOContext ctxt) throws IOException {
                return new KeysToLowercaseParser(super._createParser(data, offset, len, ctxt));
            }

            @Override
            protected JsonParser _createParser(InputStream in, IOContext ctxt) throws IOException {
                return new KeysToLowercaseParser(super._createParser(in, ctxt));
            }

            @Override
            protected JsonParser _createParser(Reader r, IOContext ctxt) throws IOException {
                return new KeysToLowercaseParser(super._createParser(r, ctxt));
            }

            @Override
            protected JsonParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable)
                    throws IOException {
                return new KeysToLowercaseParser(super._createParser(data, offset, len, ctxt, recyclable));
            }
        });


        File file = new File("src/main/resources/test.json");
        JsonNode jsonNode = mapper.readTree(file);
        String output = mapper.writeValueAsString(jsonNode);
        System.out.println(output);
    }

}

class KeysToLowercaseParser extends JsonParserDelegate {
    KeysToLowercaseParser(JsonParser d) {
        super(d);
    }

    @Override
    public String getCurrentName() throws IOException {
        if (hasTokenId(JsonTokenId.ID_FIELD_NAME)) {
            return delegate.getCurrentName().toLowerCase();
        }
        return delegate.getCurrentName();
    }

    @Override
    public String getText() throws IOException {
        if (hasTokenId(JsonTokenId.ID_FIELD_NAME)) {
            return delegate.getText().toLowerCase();
        }
        return delegate.getText();
    }
}

嘿,我真的很想找到一个Jolt规范,因为我想将其集成到Apache Nifi管道中(使用Jolt处理器),但是我会给你声望,感谢你在回答中付出的努力。谢谢!(编辑:说我只能在15小时后颁发奖励,但我会这样做。) - Gavin Gilmour
嘿,谢谢,但我没意识到你在寻找一个特定于 Jolt 的答案。为什么不等到整个悬赏期结束呢?也许会有人提供基于 Jolt 的解决方案。 - hovanessyan
好的,我会这样做,我在问题中应该更清楚一些。如果所有其他方法都失败了,我可能会考虑在Nifi中使用ExecuteScript处理器来包装您的代码。再次感谢! - Gavin Gilmour

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接