comparison analyze_document/app.py @ 10:2350662483a3

fix minor bugs
author Dennis C. M. <dennis@denniscm.com>
date Thu, 08 Jun 2023 17:16:36 +0100
parents bf19235a9636
children
comparison
equal deleted inserted replaced
9:bf19235a9636 10:2350662483a3
11 def lambda_handler(event, context): 11 def lambda_handler(event, context):
12 event_detail = event['detail'] 12 event_detail = event['detail']
13 bucket_name = event_detail['bucket']['name'] 13 bucket_name = event_detail['bucket']['name']
14 object_key = event_detail['object']['key'] 14 object_key = event_detail['object']['key']
15 15
16 company_ticker = re.search('unprocessed/(.*)_', object_key).group(1) 16 company_ticker = re.search('unprocessed/(.*?)_', object_key).group(1)
17 doc_type = re.search(f'unprocessed/{company_ticker}_(.*)_', object_key).group(1) 17 doc_type = re.search(f'unprocessed/{company_ticker}_(.*?)_', object_key).group(1)
18 file_id = str(uuid.uuid4()) 18 file_id = str(uuid.uuid4())
19 19
20 data_dict = textract_client.analyze_document( 20 data_dict = textract_client.analyze_document(
21 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, 21 Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}},
22 FeatureTypes=['TABLES'] 22 FeatureTypes=['TABLES']