comparison upload_document/app.py @ 4:9005b7590008

state machine working
author Dennis C. M. <dennis@denniscm.com>
date Mon, 05 Jun 2023 10:13:43 +0100
parents 2e5f3664f3e4
children d15ccf5f1373
comparison
equal deleted inserted replaced
3:2e5f3664f3e4 4:9005b7590008
1 import json 1 import json
2 import boto3 2 import boto3
3 import re 3
4 4
5 s3_client = boto3.client('s3') 5 s3_client = boto3.client('s3')
6 dynamodb = boto3.resource('dynamodb') 6 dynamodb = boto3.resource('dynamodb')
7 table = dynamodb.Table('FinanceParser') 7 table = dynamodb.Table('FinanceParser')
8 8
9 9
10 def lambda_handler(event, context): 10 def lambda_handler(event, context):
11 event_message = event['body']['message'] 11 event_msg = event['body']['message']
12 object_key = event_message['objectKey']
13 bucket_name = event_message['bucketName']
14 company_ticker = re.search('processed/(.*)_', object_key).group(1)
15 12
16 # Download file from s3 13 # Download file from s3
17 s3_client.download_file(bucket_name, object_key, '/tmp/document.json') 14 s3_client.download_file(
15 event_msg['bucketName'],
16 event_msg['objectKey'],
17 '/tmp/document.json'
18 )
18 19
19 with open('/tmp/document.json') as f: 20 with open('/tmp/document.json') as f:
20 doc = json.load(f) 21 doc = json.load(f)
21 22
22 for dateColumn, date in doc['dateColumns'].items(): 23 for dateColumn, date in doc['dateColumns'].items():
26 column_types = account['type'] 27 column_types = account['type']
27 except KeyError: 28 except KeyError:
28 column_types = [] 29 column_types = []
29 30
30 """ 31 """
31 The following statement avoids getting a `2020` as the value 32 Given:
32 of `ASSETS`.
33
34 +------------------+------+------+ 33 +------------------+------+------+
35 | ASSETS | 2020 | 2019 | 34 | ASSETS | 2020 | 2019 |
36 +------------------+------+------+ 35 +------------------+------+------+
37 | ASSETS_ACCOUNT_1 | | | 36 | ASSETS_ACCOUNT_1 | | |
38 +------------------+------+------+ 37 +------------------+------+------+
39 | ASSETS_ACCOUNT_2 | | | 38 | ASSETS_ACCOUNT_2 | | |
40 +------------------+------+------+ 39 +------------------+------+------+
40
41 The following statement avoids getting `2020` as the value of `ASSETS`.
41 """ 42 """
42 43
43 account_value = account[dateColumn] 44 account_value = account[dateColumn]
44 if 'COLUMN_HEADER' in column_types and date == account_value: 45 if 'COLUMN_HEADER' in column_types and date == account_value:
45 account_value = '' 46 account_value = ''
49 # pk -> item_type#company_ticker 50 # pk -> item_type#company_ticker
50 # sk -> date#row_index 51 # sk -> date#row_index
51 52
52 batch.put_item( 53 batch.put_item(
53 Item={ 54 Item={
54 'pk': f'balance#{company_ticker}', 55 'pk': f"balance#{event_msg['companyTicker']}",
55 'sk': f'{date}#{row_index}', 56 'sk': f'{date}#{row_index}',
56 'account_name': account['1'], 57 'account_name': account['1'],
57 'account_value': account_value, 58 'account_value': account_value,
58 'column_types': column_types 59 'column_types': column_types
59 } 60 }
60 ) 61 )
61 62
62 # pk -> item_type#company_ticker 63 # pk -> item_type#company_ticker
63 # sk -> date 64 # sk -> date#filename
64 65
65 table.put_item( 66 table.put_item(
66 Item={ 67 Item={
67 'pk': f'file#{company_ticker}', 68 'pk': f"file#{event_msg['companyTicker']}",
68 'sk': f"{date}", 69 'sk': f"{date}#{event_msg['objectKey'].replace('processed/', '')}"
69 'filename': object_key.replace('processed/', '')
70 } 70 }
71 ) 71 )
72 72
73 return { 73 return {
74 "statusCode": 200, 74 "statusCode": 200,