comparison analyze_document/app.py @ 4:9005b7590008

state machine working
author Dennis C. M. <dennis@denniscm.com>
date Mon, 05 Jun 2023 10:13:43 +0100
parents 2e5f3664f3e4
children d15ccf5f1373
comparison
equal deleted inserted replaced
3:2e5f3664f3e4 4:9005b7590008
10 10
11 def lambda_handler(event, context): 11 def lambda_handler(event, context):
12 event_detail = event['detail'] 12 event_detail = event['detail']
13 bucket_name = event_detail['bucket']['name'] 13 bucket_name = event_detail['bucket']['name']
14 object_key = event_detail['object']['key'] 14 object_key = event_detail['object']['key']
15 company_ticker = re.search('unprocessed/(.*).pdf', object_key).group(1) 15
16 company_ticker = re.search('unprocessed/(.*)_', object_key).group(1)
17 doc_type = re.search(f'unprocessed/{company_ticker}_(.*).pdf', object_key).group(1)
18 file_id = uuid.uuid4()
16 19
17 data_dict = textract_client.analyze_document( 20 data_dict = textract_client.analyze_document(
18 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, 21 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}},
19 FeatureTypes=['TABLES'] 22 FeatureTypes=['TABLES']
20 ) 23 )
21 24
22 data_string = json.dumps(data_dict, indent=2, default=str) 25 data_string = json.dumps(data_dict, indent=2, default=str)
23 filename = f'{company_ticker}_{uuid.uuid4()}.json' 26 filename = f'{company_ticker}_{doc_type}_{file_id}.json'
24 27
25 s3_client.put_object( 28 s3_client.put_object(
26 Bucket=bucket_name, 29 Bucket=bucket_name,
27 Key=f'analyzed/{filename}', 30 Key=f'analyzed/{filename}',
28 Body=data_string 31 Body=data_string
35 38
36 return { 39 return {
37 "statusCode": 200, 40 "statusCode": 200,
38 "body": { 41 "body": {
39 "message": { 42 "message": {
43 "companyTicker": company_ticker,
44 "docType": doc_type,
45 "fileId": file_id,
46 "fileName": filename,
40 "objectKey": f'analyzed/{filename}', 47 "objectKey": f'analyzed/{filename}',
41 "bucketName": bucket_name 48 "bucketName": bucket_name
42 } 49 }
43 }, 50 },
44 } 51 }