0
|
1 import json
|
|
2 from datetime import datetime
|
|
3
|
|
4
|
|
5 def main():
|
|
6 with open('santander.json') as f:
|
|
7 doc = json.load(f)
|
|
8
|
|
9 if doc['JobStatus'] != 'SUCCEEDED':
|
|
10 print(f"JOB STATUS: {doc['JobStatus']}")
|
|
11
|
|
12 return
|
|
13
|
|
14 blocks = doc['Blocks']
|
|
15 table = extract_block(blocks, 'BlockType', 'TABLE')
|
|
16 table_child_ids = extract_child_ids(table)
|
|
17
|
|
18 for table_child_id in table_child_ids:
|
|
19 cell = extract_block(blocks, 'Id', table_child_id)
|
|
20 cell_child_ids = extract_child_ids(cell)
|
|
21
|
|
22 cell_value = ''
|
|
23 for index, cell_child_id in enumerate(cell_child_ids):
|
|
24 word_block = extract_block(blocks, 'Id', cell_child_id)
|
|
25 cell_value += word_block['Text'].lower()
|
|
26
|
|
27 if index < len(cell_child_ids) - 1:
|
|
28 cell_value += '_'
|
|
29
|
|
30 print(cell_value)
|
|
31 print(is_date(cell_value))
|
|
32
|
|
33
|
|
34
|
|
35 def extract_child_ids(block):
|
|
36 if not 'Relationships' in block:
|
|
37 return []
|
|
38
|
|
39 return [r['Ids'] for r in block['Relationships'] if r['Type'] == 'CHILD'][0]
|
|
40
|
|
41
|
|
42 def extract_block(blocks, block_key, block_value):
|
|
43 return [block for block in blocks if block[block_key] == block_value][0]
|
|
44
|
|
45
|
|
46 def is_date(string_date):
|
|
47 formats_allowed = ['%d-%m-%Y', '%d/%m/%Y', '%d.%m.%Y', '%Y']
|
|
48
|
|
49 for format_allowed in formats_allowed:
|
|
50 try:
|
|
51 datetime.strptime(string_date, format_allowed)
|
|
52
|
|
53 return True
|
|
54 except ValueError:
|
|
55
|
|
56 # Try removing characters from the beginning and end
|
|
57 options = [string_date[:-1], string_date[1:], string_date[1:-1]]
|
|
58 for option in options:
|
|
59 try:
|
|
60 datetime.strptime(option, format_allowed)
|
|
61
|
|
62 return True
|
|
63 except ValueError:
|
|
64 continue
|
|
65
|
|
66 return False
|
|
67
|
|
68
|
|
69 if __name__ == '__main__':
|
|
70 main() |