Em um dos projetos, foi necessário transferir os processos de importação de dados de sistemas de terceiros para uma arquitetura de microsserviço. Apache NiFi é selecionado como a ferramenta. A importação do Cadastro Estadual Unificado de Pessoas Jurídicas do Serviço de Impostos Federais foi escolhida como a primeira matéria experimental.
O artigo anterior descreveu uma maneira de converter XML em JSON usando o esquema AVRO.
Este artigo descreve uma maneira de converter JSON usando a especificação JOLT.
Processadores e controladores usados
Dividindo JSON em partes
O FlowFile obtido no estágio anterior contém JSON com uma matriz de instruções USRLE para diferentes organizações. Primeiro, vamos dividi-lo em partes para que cada FlowFile contenha uma instrução.
Para isso usamos o processador SplitJson . Nas configurações - você precisa especificar uma expressão JsonPath para dividir o json em partes. Neste caso, $. *
Documentação JsonPath aqui
Você pode praticar aqui
Conversão JSON
O JSON resultante tem uma estrutura desnecessariamente complexa para armazená-lo e processá-lo posteriormente. É melhor combinar o endereço e o nome completo em uma linha, mover alguns elementos para cima na hierarquia.
JSON antes da transformação
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : {
"type" : "",
"name" : ""
},
"district" : null,
"town" : {
"type" : "",
"name" : ""
},
"settlement" : null,
"street" : {
"type" : "",
"name" : ""
},
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
JSON JoltTransformJSON.
:
Jolt Transformation DSL - . Chain -
Jolt Specification - .
JOLT
- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .
JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
modify-overwrite-beta , .. .
, : - modify-overwrite-beta - shift. - operation - spec.
, .
modify-overwrite-beta
. , . , .
.
(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"
.
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
}
. , "region": "=concat(@(type), ' ', @(name))",
: region, type name. region, @(type)
.
(. modify-overwrite-beta) - value.
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
, @(1,index)
. , index . .. value addressRF, addressRF index.
, =
concat
, @(1,index)
.
fias - .
. shift .
. "*"
. , .. director , .
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
shift
shift JSON.
JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : " ",
"district" : " ",
"town" : " ",
"settlement" : " ",
"street" : " ",
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null,
"value" : "143500, , , , , , , , ",
"fias" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
, - , , , . , modify-overwrite-beta , . , shift - , .
shift
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
shift . , , , . , . &
. , , &0
. . &1
, .. &
- , pre-&-post
. .. &
name, pre-name-post. . .
- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"
. , . |
.
fullName shortName "name": { "*": "&" }
.
"*"
, , name
.
"&"
, .
- .
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
}
. . - "&2.&"
. , , . &2
address, &
- . &1
addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ]
lastName, firstName patronymic.
inn fio .
ogrnip, typeCode typeName .
name position.
disqualification .
- , , - . , &
- [&]
.
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
}
, fio inn. &3[&2].&
. . : &3
- director, [&2]
- , &
- fio inn.
name position. &3
- director, [&2]
- , &1
- position. &
, name , position.
O resto dos elementos em posição são simplesmente agrupados em um nível. a desqualificação permanece inalterada.
Além disso, são utilizadas construções semelhantes.
Exemplo
E, finalmente, duplicarei o JSON original, a especificação JOLT e o JSON resultante
JSON bruto
{
"reportDate": "2020-05-20",
"ogrn": "1234567890123",
"ogrnDate": "2002-12-30",
"inn": "1234567890",
"kpp": "123456789",
"opfCode": "12300",
"opfName": " ",
"name": {
"fullName": " ",
"shortName": ""
},
"address": {
"addressRF": {
"region": {
"type": "",
"name": ""
},
"district": null,
"town": {
"type": "",
"name": ""
},
"settlement": null,
"street": {
"type": "",
"name": ""
},
"index": "143500",
"regionCode": "50",
"kladr": "500000570000011",
"house": null,
"building": null,
"apartment": null
}
},
"termination": null,
"capital": null,
"manageOrg": null,
"director": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"position": {
"ogrnip": null,
"typeCode": "02",
"typeName": " ",
"name": " "
},
"disqualification": null
}
],
"founders": {
"founderULRF": null,
"founderULForeign": null,
"founderFL": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
},
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789021"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
}
],
"founderGov": null,
"founderPIF": null
},
"capitalPart": null,
"holderReestrAO": null,
"okved": {
"mainOkved": {
"code": "47.11",
"name": " , , "
},
"addOkved": null
}
}
Especificação JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
JSON resultante
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"fullName" : " ",
"shortName" : "",
"address" : {
"kladr" : "500000570000011",
"regionCode" : "50",
"value" : "143500, , , , , , , , ",
"fias" : null
},
"capital" : null,
"director" : [ {
"fio" : " ",
"inn" : "123456789012",
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"position" : " ",
"disqualification" : null
} ],
"founders" : {
"founderFL" : [ {
"fio" : " ",
"inn" : "123456789012",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fio" : " ",
"inn" : "123456789021",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ]
},
"capitalPart" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
Adicional
Além disso, o JSON resultante deve ser colocado em algum lugar para armazenamento e uso posterior. Mas isso vai além da narrativa. Existe algo conveniente para alguém.