Mercurial > public > finance-parser
comparison upload_document/app.py @ 4:9005b7590008
state machine working
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Mon, 05 Jun 2023 10:13:43 +0100 |
parents | 2e5f3664f3e4 |
children | d15ccf5f1373 |
comparison
equal
deleted
inserted
replaced
3:2e5f3664f3e4 | 4:9005b7590008 |
---|---|
1 import json | 1 import json |
2 import boto3 | 2 import boto3 |
3 import re | 3 |
4 | 4 |
5 s3_client = boto3.client('s3') | 5 s3_client = boto3.client('s3') |
6 dynamodb = boto3.resource('dynamodb') | 6 dynamodb = boto3.resource('dynamodb') |
7 table = dynamodb.Table('FinanceParser') | 7 table = dynamodb.Table('FinanceParser') |
8 | 8 |
9 | 9 |
10 def lambda_handler(event, context): | 10 def lambda_handler(event, context): |
11 event_message = event['body']['message'] | 11 event_msg = event['body']['message'] |
12 object_key = event_message['objectKey'] | |
13 bucket_name = event_message['bucketName'] | |
14 company_ticker = re.search('processed/(.*)_', object_key).group(1) | |
15 | 12 |
16 # Download file from s3 | 13 # Download file from s3 |
17 s3_client.download_file(bucket_name, object_key, '/tmp/document.json') | 14 s3_client.download_file( |
15 event_msg['bucketName'], | |
16 event_msg['objectKey'], | |
17 '/tmp/document.json' | |
18 ) | |
18 | 19 |
19 with open('/tmp/document.json') as f: | 20 with open('/tmp/document.json') as f: |
20 doc = json.load(f) | 21 doc = json.load(f) |
21 | 22 |
22 for dateColumn, date in doc['dateColumns'].items(): | 23 for dateColumn, date in doc['dateColumns'].items(): |
26 column_types = account['type'] | 27 column_types = account['type'] |
27 except KeyError: | 28 except KeyError: |
28 column_types = [] | 29 column_types = [] |
29 | 30 |
30 """ | 31 """ |
31 The following statement avoids getting a `2020` as the value | 32 Given: |
32 of `ASSETS`. | |
33 | |
34 +------------------+------+------+ | 33 +------------------+------+------+ |
35 | ASSETS | 2020 | 2019 | | 34 | ASSETS | 2020 | 2019 | |
36 +------------------+------+------+ | 35 +------------------+------+------+ |
37 | ASSETS_ACCOUNT_1 | | | | 36 | ASSETS_ACCOUNT_1 | | | |
38 +------------------+------+------+ | 37 +------------------+------+------+ |
39 | ASSETS_ACCOUNT_2 | | | | 38 | ASSETS_ACCOUNT_2 | | | |
40 +------------------+------+------+ | 39 +------------------+------+------+ |
40 | |
41 The following statement avoids getting `2020` as the value of `ASSETS`. | |
41 """ | 42 """ |
42 | 43 |
43 account_value = account[dateColumn] | 44 account_value = account[dateColumn] |
44 if 'COLUMN_HEADER' in column_types and date == account_value: | 45 if 'COLUMN_HEADER' in column_types and date == account_value: |
45 account_value = '' | 46 account_value = '' |
49 # pk -> item_type#company_ticker | 50 # pk -> item_type#company_ticker |
50 # sk -> date#row_index | 51 # sk -> date#row_index |
51 | 52 |
52 batch.put_item( | 53 batch.put_item( |
53 Item={ | 54 Item={ |
54 'pk': f'balance#{company_ticker}', | 55 'pk': f"balance#{event_msg['companyTicker']}", |
55 'sk': f'{date}#{row_index}', | 56 'sk': f'{date}#{row_index}', |
56 'account_name': account['1'], | 57 'account_name': account['1'], |
57 'account_value': account_value, | 58 'account_value': account_value, |
58 'column_types': column_types | 59 'column_types': column_types |
59 } | 60 } |
60 ) | 61 ) |
61 | 62 |
62 # pk -> item_type#company_ticker | 63 # pk -> item_type#company_ticker |
63 # sk -> date | 64 # sk -> date#filename |
64 | 65 |
65 table.put_item( | 66 table.put_item( |
66 Item={ | 67 Item={ |
67 'pk': f'file#{company_ticker}', | 68 'pk': f"file#{event_msg['companyTicker']}", |
68 'sk': f"{date}", | 69 'sk': f"{date}#{event_msg['objectKey'].replace('processed/', '')}" |
69 'filename': object_key.replace('processed/', '') | |
70 } | 70 } |
71 ) | 71 ) |
72 | 72 |
73 return { | 73 return { |
74 "statusCode": 200, | 74 "statusCode": 200, |