python-3.x 如何拆分JSON DataFrame对象的字符串?

xqnpmsa8  于 2023-05-19  发布在  Python
关注(0)|答案(1)|浏览(153)

我有多个JSON对象的DataFrame:JSON DataFrame。我想删除.之前的所有内容或所有.之前的内容,只保留最后一个.之后的字符串。我该如何为此编写Python脚本?我想使用split函数,但不确定如何为DataFrame JSON对象实现它,因为我得到了ValueError: Length mismatch: Expected axis has 75 elements, new values have 2 elements。以下是我的剧本:

import pandas as pd
import os
from openpyxl.workbook import Workbook
import csv
from pathlib import Path
from pandas.io.json import json_normalize
import json
from datetime import datetime
from datetime import date
from datetime import timedelta
import psycopg2
from psycopg2 import OperationalError
# Import our files
import pg  # Various functions to interface with the Postgres servers
from db_creds import *  # The DB server and user creds

#try:
    # Connect to an existing database
connection = pg.create_connection(sourceDB_setting[3], sourceDB_setting[5], sourceDB_setting[6], sourceDB_setting[1], sourceDB_setting[2])
#Create a cursor to perform database operations
cursor = connection.cursor()

cursor.execute("SELECT application_data, id FROM ssap_applications LIMIT 2;")
results = cursor.fetchall()

if os.path.exists('App_data3.csv'):
    os.remove('App_data3.csv')

for row in results:
    jrec, app_id = row
    # Process each row here
    #print(jrec)
    jrec = json.loads(jrec)
    normal_json = pd.json_normalize(jrec)
    normal_json.columns = normal_json.columns.str.split('.')[-1]
    print(normal_json.columns)
    # save to csv
    normal_json.to_csv('App_data3.csv', header=True, index=False, mode='a', encoding='utf-8')

cursor.close()

FYI,这是我的DataFrame JSON对象:

Index(['singleStreamlinedApplication.applicationType',
       'singleStreamlinedApplication.ApplicationType',
       'singleStreamlinedApplication.applicationStatus',
       'singleStreamlinedApplication.coverageYear',
       'singleStreamlinedApplication.eligibilityStatus',
       'singleStreamlinedApplication.currentPage',
       'singleStreamlinedApplication.maxAchievedPage',
       'singleStreamlinedApplication.finalized',
       'singleStreamlinedApplication.acceptPrivacyIndicator',
       'singleStreamlinedApplication.ssapApplicationId',
       'singleStreamlinedApplication.IP',
       'singleStreamlinedApplication.applicationGuid',
       'singleStreamlinedApplication.applicationSignatureDate',
       'singleStreamlinedApplication.getHelpIndicator',
       'singleStreamlinedApplication.helpType',
       'singleStreamlinedApplication.authorizedRepresentative.signatureisProofIndicator',
       'singleStreamlinedApplication.authorizedRepresentative.phone',
       'singleStreamlinedApplication.authorizedRepresentative.partOfOrganizationIndicator',
       'singleStreamlinedApplication.authorizedRepresentative.address.postalCode',
       'singleStreamlinedApplication.authorizedRepresentative.address.county',
       'singleStreamlinedApplication.authorizedRepresentative.address.primaryAddressCountyFipsCode',
       'singleStreamlinedApplication.authorizedRepresentative.address.streetAddress1',
       'singleStreamlinedApplication.authorizedRepresentative.address.streetAddress2',
       'singleStreamlinedApplication.authorizedRepresentative.address.state',
       'singleStreamlinedApplication.authorizedRepresentative.address.city',
       'singleStreamlinedApplication.authorizedRepresentative.address.countyCode',
       'singleStreamlinedApplication.authorizedRepresentative.address.addressId',
       'singleStreamlinedApplication.authorizedRepresentative.name.middleName',
       'singleStreamlinedApplication.authorizedRepresentative.name.lastName',
       'singleStreamlinedApplication.authorizedRepresentative.name.firstName',
       'singleStreamlinedApplication.authorizedRepresentative.name.suffix',
       'singleStreamlinedApplication.authorizedRepresentative.emailAddress',
       'singleStreamlinedApplication.authorizedRepresentative.organizationId',
       'singleStreamlinedApplication.authorizedRepresentative.companyName',
       'singleStreamlinedApplication.authorizedRepresentative.hasUserElectronicallySigned',
       'singleStreamlinedApplication.applyingForhouseHold',
       'singleStreamlinedApplication.applyingForFinancialAssistanceIndicator',
       'singleStreamlinedApplication.consentAgreement',
       'singleStreamlinedApplication.agreeToUseIncomeData',
       'singleStreamlinedApplication.agreeToEndCoverage',
       'singleStreamlinedApplication.numberOfYearsAgreed',
       'singleStreamlinedApplication.authorizedRepresentativeIndicator',
       'singleStreamlinedApplication.applicationStartDate',
       'singleStreamlinedApplication.applicationDate',
       'singleStreamlinedApplication.taxFilerDependants',
       'singleStreamlinedApplication.isRidpVerified',
       'singleStreamlinedApplication.clientIp',
       'singleStreamlinedApplication.primaryTaxFilerPersonId',
       'singleStreamlinedApplication.broker.internalBrokerId',
       'singleStreamlinedApplication.broker.brokerFederalTaxIdNumber',
       'singleStreamlinedApplication.broker.brokerName',
       'singleStreamlinedApplication.broker.brokerFirstName',
       'singleStreamlinedApplication.broker.brokerLastName',
       'singleStreamlinedApplication.assister.assisterName',
       'singleStreamlinedApplication.assister.assisterFirstName',
       'singleStreamlinedApplication.assister.assisterLastName',
       'singleStreamlinedApplication.assister.assisterID',
       'singleStreamlinedApplication.assister.internalAssisterId',
       'singleStreamlinedApplication.taxHousehold',
       'singleStreamlinedApplication.incarcerationAsAttestedIndicator',
       'singleStreamlinedApplication.mecCheckStatus',
       'singleStreamlinedApplication.jointTaxFilerSpousePersonId',
       'singleStreamlinedApplication.currentApplicantId',
       'singleStreamlinedApplication.ssapVersion',
       'singleStreamlinedApplication.exemptHousehold',
       'singleStreamlinedApplication.brokerChanged',
       'singleStreamlinedApplication.renewalConsentChanged',
       'singleStreamlinedApplication.incomeConsentChanged',
       'singleStreamlinedApplication.isEditApplication',
       'singleStreamlinedApplication.manuallyCreated',
       'singleStreamlinedApplication.claimNoticeDMAHSAcknowledgment',
       'singleStreamlinedApplication.paybackDMAHSAcknowledgment',
       'singleStreamlinedApplication.uiProperties',
       'singleStreamlinedApplication.externalApplicationId',
       'singleStreamlinedApplication.outboundCaseNumber'],
      dtype='object')
lpwwtiir

lpwwtiir1#

你已经接近了,你只需要在 * [-1]之前加上str *:

...
    normal_json = pd.json_normalize(jrec)
    normal_json.columns = normal_json.columns.str.split('.').str[-1] # <-- line updated

输出:

print(normal_json.columns)

Index(['applicationType', 'ApplicationType', 'applicationStatus',
       'coverageYear', 'eligibilityStatus', 'currentPage', 'maxAchievedPage',
       'finalized', 'acceptPrivacyIndicator', 'ssapApplicationId', 'IP',
       'applicationGuid', 'applicationSignatureDate', 'getHelpIndicator',
       'helpType', 'signatureisProofIndicator', 'phone',
       'partOfOrganizationIndicator', 'postalCode', 'county',
       'primaryAddressCountyFipsCode', 'streetAddress1', 'streetAddress2',
       'state', 'city', 'countyCode', 'addressId', 'middleName', 'lastName',
       'firstName', 'suffix', 'emailAddress', 'organizationId', 'companyName',
       'hasUserElectronicallySigned', 'applyingForhouseHold',
        ...

相关问题