annotate process_document/app.py @ 5:2daf0dc08247

add get report endpoint
author Dennis C. M. <dennis@denniscm.com>
date Mon, 05 Jun 2023 12:48:47 +0100
parents 9005b7590008
children d15ccf5f1373
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
1 import json
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
2 import boto3
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
3 from datetime import datetime
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
4 from collections import defaultdict
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
5
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
6
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
7 s3_client = boto3.client('s3')
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
8
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
9
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
10 def lambda_handler(event, context):
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
11 event_msg = event['body']['message']
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
12
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
13 # Download file from s3
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
14 s3_client.download_file(
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
15 event_msg['bucketName'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
16 event_msg['objectKey'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
17 '/tmp/document.json'
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
18 )
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
19
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
20 with open('/tmp/document.json') as f:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
21 doc = json.load(f)
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
22
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
23 # Analyze document
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
24 result = defaultdict(dict)
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
25 blocks = doc['Blocks']
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
26
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
27 # Get format
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
28 lines = filter_blocks(blocks, 'BlockType', 'LINE')
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
29 for line in lines:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
30 amount_format = get_format(line['Text'])
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
31 result['format'] = amount_format
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
32 if amount_format:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
33 break
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
34
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
35 # Find dates value and position
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
36 data = defaultdict(dict)
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
37 cells = filter_blocks(blocks, 'BlockType', 'CELL')
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
38 for cell in cells:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
39 if not 'Relationships' in cell:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
40 continue
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
41
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
42 child_ids = [r['Ids'] for r in cell['Relationships'] if r['Type'] == 'CHILD'][0]
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
43
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
44 # Get `Text` from `CELL` block
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
45 cell_text = ''
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
46 for index, child_id in enumerate(child_ids):
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
47 word_block = filter_blocks(blocks, 'Id', child_id)[0]
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
48 cell_text += word_block['Text']
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
49
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
50 if index < len(child_ids) - 1:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
51 cell_text += '_'
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
52
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
53 # Verify if `Text` could be a valid date
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
54 date_string = is_date(clean_text(cell_text, 'date'))
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
55 if date_string:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
56 cell_text = date_string
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
57 result['dateRow'] = cell['RowIndex']
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
58 result['dateColumns'][cell['ColumnIndex']] = date_string
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
59
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
60 cell_row_index = cell['RowIndex']
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
61 cell_column_index = cell['ColumnIndex']
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
62 data[cell_row_index][cell_column_index] = clean_text(cell_text)
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
63
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
64 try:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
65 data[cell_row_index]['type'] = cell['EntityTypes']
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
66 except KeyError:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
67 pass
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
68
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
69 # Delete unused row and columns
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
70 for row_index in list(data.keys()):
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
71 row = data[row_index]
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
72 for column_index in list(row.keys()):
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
73 if column_index not in result['dateColumns'] \
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
74 and column_index != 1 and column_index != 'type':
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
75 del row[column_index]
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
76
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
77 if len(row) > 1:
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
78 result['data'][row_index] = row
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
79
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
80 object_key = event_msg['objectKey'].replace('analyzed/', 'processed/')
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
81 data_string = json.dumps(result, indent=2, default=str)
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
82
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
83 s3_client.put_object(
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
84 Bucket=event_msg['bucketName'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
85 Key=object_key,
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
86 Body=data_string
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
87 )
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
88
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
89 return {
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
90 "statusCode": 200,
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
91 "body": {
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
92 "message": {
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
93 "companyTicker": event_msg['companyTicker'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
94 "docType": event_msg['docType'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
95 "fileId": event_msg['fileId'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
96 "fileName": event_msg['fileName'],
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
97 "objectKey": object_key,
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
98 "bucketName": event_msg['bucketName']
3
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
99 }
2e5f3664f3e4 documents analyzer almost finished
Dennis C. M. <dennis@denniscm.com>
parents: 2
diff changeset
100 },
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
101 }
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
102
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
103
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
104 def filter_blocks(blocks, block_key, block_value):
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
105 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
106 Extract a block by key-value from array of blocks
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
107 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
108
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
109 return [block for block in blocks if block[block_key] == block_value]
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
110
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
111
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
112 def is_date(string_date):
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
113 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
114 Verify if a string could be a date.
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
115 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
116
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
117 formats_allowed = ['%d-%m-%Y', '%d/%m/%Y', '%Y']
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
118
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
119 for format_allowed in formats_allowed:
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
120 try:
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
121 date = datetime.strptime(string_date, format_allowed)
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
122
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
123 if date.year > datetime.now().year or date.year < 1900:
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
124 return # Date out of range date
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
125
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
126 return date.strftime("%Y")
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
127 except ValueError:
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
128 continue
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
129
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
130 return
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
131
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
132
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
133 def get_format(phrase):
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
134 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
135 Given a phrase verify if it is specified the amount format
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
136 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
137
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
138 amount_formats = ['thousand', 'million', 'billion']
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
139
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
140 for amount_format in amount_formats:
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
141 plural_amount_format = f'{amount_format}s'
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
142
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
143 if amount_format in phrase or plural_amount_format in phrase:
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
144 return amount_format
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
145
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
146
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
147 def clean_text(text, text_type='default'):
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
148 """"
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
149 Remove bad characters from word
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
150 """
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
151
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
152 special_chars = [
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
153 '!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
154 '-', '_', '+', '=', '[', ']', '{', '}', '\\', '|',
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
155 ';', ':', '"', '\'', '<', '>', '/', '?', '.', ','
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
156 ]
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
157
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
158 if text_type == 'date':
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
159 allowed_chars = ['_', '-', '/']
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
160
4
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
161 # Sometimes date is '2020a' or 'b2020' because indexes
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
162 if text[-1].isalpha():
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
163 special_chars.append(text[-1])
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
164
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
165 if text[0].isalpha():
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
166 special_chars.append(text[0])
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
167 else:
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
168 allowed_chars = ['.', ',', '-', ' ']
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
169
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
170 special_chars = [char for char in special_chars if char not in allowed_chars]
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
171
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
172 for char in special_chars:
9005b7590008 state machine working
Dennis C. M. <dennis@denniscm.com>
parents: 3
diff changeset
173 text = text.replace(char, '')
2
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
174
ef8a4d95755a add aws sam project
Dennis C. M. <dennis@denniscm.com>
parents:
diff changeset
175 return text.lower()