Mercurial > public > finance-parser
comparison analyze_document/app.py @ 4:9005b7590008
state machine working
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Mon, 05 Jun 2023 10:13:43 +0100 |
parents | 2e5f3664f3e4 |
children | d15ccf5f1373 |
comparison
equal
deleted
inserted
replaced
3:2e5f3664f3e4 | 4:9005b7590008 |
---|---|
10 | 10 |
11 def lambda_handler(event, context): | 11 def lambda_handler(event, context): |
12 event_detail = event['detail'] | 12 event_detail = event['detail'] |
13 bucket_name = event_detail['bucket']['name'] | 13 bucket_name = event_detail['bucket']['name'] |
14 object_key = event_detail['object']['key'] | 14 object_key = event_detail['object']['key'] |
15 company_ticker = re.search('unprocessed/(.*).pdf', object_key).group(1) | 15 |
16 company_ticker = re.search('unprocessed/(.*)_', object_key).group(1) | |
17 doc_type = re.search(f'unprocessed/{company_ticker}_(.*).pdf', object_key).group(1) | |
18 file_id = uuid.uuid4() | |
16 | 19 |
17 data_dict = textract_client.analyze_document( | 20 data_dict = textract_client.analyze_document( |
18 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, | 21 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, |
19 FeatureTypes=['TABLES'] | 22 FeatureTypes=['TABLES'] |
20 ) | 23 ) |
21 | 24 |
22 data_string = json.dumps(data_dict, indent=2, default=str) | 25 data_string = json.dumps(data_dict, indent=2, default=str) |
23 filename = f'{company_ticker}_{uuid.uuid4()}.json' | 26 filename = f'{company_ticker}_{doc_type}_{file_id}.json' |
24 | 27 |
25 s3_client.put_object( | 28 s3_client.put_object( |
26 Bucket=bucket_name, | 29 Bucket=bucket_name, |
27 Key=f'analyzed/{filename}', | 30 Key=f'analyzed/{filename}', |
28 Body=data_string | 31 Body=data_string |
35 | 38 |
36 return { | 39 return { |
37 "statusCode": 200, | 40 "statusCode": 200, |
38 "body": { | 41 "body": { |
39 "message": { | 42 "message": { |
43 "companyTicker": company_ticker, | |
44 "docType": doc_type, | |
45 "fileId": file_id, | |
46 "fileName": filename, | |
40 "objectKey": f'analyzed/{filename}', | 47 "objectKey": f'analyzed/{filename}', |
41 "bucketName": bucket_name | 48 "bucketName": bucket_name |
42 } | 49 } |
43 }, | 50 }, |
44 } | 51 } |