annotate upload_document/app.py @ 12:d4c4cd4760fa

fix KeyError bug
author Dennis C. M. <dennis@denniscm.com>
date Thu, 08 Jun 2023 17:48:52 +0100
parents d09dee7a86da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
1 import json
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
2 import boto3
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
3
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
4
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
5 s3_client = boto3.client('s3')
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
6 dynamodb = boto3.resource('dynamodb')
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
7 table = dynamodb.Table('FinanceParser')
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
8
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
9
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
10 def lambda_handler(event, context):
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
11 event_msg = event['body']['message']
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
12
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
13 # Download file from s3
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
14 s3_client.download_file(
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
15 event_msg['bucketName'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
16 event_msg['objectKey'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
17 '/tmp/document.json'
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
18 )
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
19
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
20 with open('/tmp/document.json') as f:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
21 doc = json.load(f)
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
22
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
23 for dateColumn, date in doc['dateColumns'].items():
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
24 for row_index, account in doc['data'].items():
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
25
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
26 try:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
27 column_types = account['type']
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
28 except KeyError:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
29 column_types = []
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
30
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
31 """
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
32 Given:
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
33 +------------------+------+------+
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
34 | ASSETS | 2020 | 2019 |
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
35 +------------------+------+------+
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
36 | ASSETS_ACCOUNT_1 | | |
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
37 +------------------+------+------+
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
38 | ASSETS_ACCOUNT_2 | | |
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
39 +------------------+------+------+
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
40
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
41 The following statement avoids getting `2020` as the value of `ASSETS`.
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
42 """
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
43
10
2350662483a3 fix minor bugs
Dennis C. M. <dennis@denniscm.com>
parents: 6
diff changeset
44 try:
2350662483a3 fix minor bugs
Dennis C. M. <dennis@denniscm.com>
parents: 6
diff changeset
45 account_value = account[dateColumn]
2350662483a3 fix minor bugs
Dennis C. M. <dennis@denniscm.com>
parents: 6
diff changeset
46 except KeyError:
2350662483a3 fix minor bugs
Dennis C. M. <dennis@denniscm.com>
parents: 6
diff changeset
47 account_value = ''
2350662483a3 fix minor bugs
Dennis C. M. <dennis@denniscm.com>
parents: 6
diff changeset
48
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
49 if 'COLUMN_HEADER' in column_types and date == account_value:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
50 account_value = ''
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
51
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
52 with table.batch_writer() as batch:
11
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
53 try:
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
54 account_name = account['1']
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
55
11
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
56 # pk -> item_type#company_ticker
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
57 # sk -> date#row_index
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
58
11
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
59 batch.put_item(
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
60 Item={
12
d4c4cd4760fa fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 11
diff changeset
61 'pk': f"{event_msg['docType']}#{event_msg['companyTicker']}",
11
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
62 'sk': f'{date}#{row_index}',
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
63 'account_name': account_name,
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
64 'account_value': account_value,
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
65 'column_types': column_types,
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
66 'format': doc['format']
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
67 }
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
68 )
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
69 except KeyError:
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
70 pass
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
71
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
72 # pk -> item_type#company_ticker
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
73 # sk -> date#filename
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
74
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
75 table.put_item(
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
76 Item={
11
d09dee7a86da fix KeyError bug
Dennis C. M. <dennis@denniscm.com>
parents: 10
diff changeset
77 'pk': f"file#{event_msg['docType']}#{event_msg['companyTicker']}",
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
78 'sk': f"{date}#{event_msg['objectKey'].replace('processed/', '')}"
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
79 }
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
80 )
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
81
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
82 return {
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
83 "statusCode": 200,
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
84 "body": json.dumps({
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
85 "message": "ok"
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
86 }),
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
87 }