Mercurial > public > finance-parser
comparison main.py @ 0:556768c7d3d7
first attempt
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Tue, 30 May 2023 20:08:35 +0100 |
parents | |
children | e23b7617bbc4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:556768c7d3d7 |
---|---|
1 import json | |
2 from datetime import datetime | |
3 | |
4 | |
5 def main(): | |
6 with open('santander.json') as f: | |
7 doc = json.load(f) | |
8 | |
9 if doc['JobStatus'] != 'SUCCEEDED': | |
10 print(f"JOB STATUS: {doc['JobStatus']}") | |
11 | |
12 return | |
13 | |
14 blocks = doc['Blocks'] | |
15 table = extract_block(blocks, 'BlockType', 'TABLE') | |
16 table_child_ids = extract_child_ids(table) | |
17 | |
18 for table_child_id in table_child_ids: | |
19 cell = extract_block(blocks, 'Id', table_child_id) | |
20 cell_child_ids = extract_child_ids(cell) | |
21 | |
22 cell_value = '' | |
23 for index, cell_child_id in enumerate(cell_child_ids): | |
24 word_block = extract_block(blocks, 'Id', cell_child_id) | |
25 cell_value += word_block['Text'].lower() | |
26 | |
27 if index < len(cell_child_ids) - 1: | |
28 cell_value += '_' | |
29 | |
30 print(cell_value) | |
31 print(is_date(cell_value)) | |
32 | |
33 | |
34 | |
35 def extract_child_ids(block): | |
36 if not 'Relationships' in block: | |
37 return [] | |
38 | |
39 return [r['Ids'] for r in block['Relationships'] if r['Type'] == 'CHILD'][0] | |
40 | |
41 | |
42 def extract_block(blocks, block_key, block_value): | |
43 return [block for block in blocks if block[block_key] == block_value][0] | |
44 | |
45 | |
46 def is_date(string_date): | |
47 formats_allowed = ['%d-%m-%Y', '%d/%m/%Y', '%d.%m.%Y', '%Y'] | |
48 | |
49 for format_allowed in formats_allowed: | |
50 try: | |
51 datetime.strptime(string_date, format_allowed) | |
52 | |
53 return True | |
54 except ValueError: | |
55 | |
56 # Try removing characters from the beginning and end | |
57 options = [string_date[:-1], string_date[1:], string_date[1:-1]] | |
58 for option in options: | |
59 try: | |
60 datetime.strptime(option, format_allowed) | |
61 | |
62 return True | |
63 except ValueError: | |
64 continue | |
65 | |
66 return False | |
67 | |
68 | |
69 if __name__ == '__main__': | |
70 main() |