|
1 |
| -import os |
2 |
| -import re |
3 |
| -import pandas as pd |
4 | 1 |
|
5 |
| -# Define the directory where text files are stored |
6 |
| -directory = '/mnt/data' # Adjust this path if your text files are located in a different directory |
7 |
| - |
8 |
| -# Define regex patterns to parse each section |
9 |
| -transaction_start_pattern = re.compile(r'^52') |
10 |
| -routing_number_pattern = re.compile(r'^62') |
11 |
| -memo_pattern = re.compile(r'^705') |
12 |
| -transaction_end_pattern = re.compile(r'^82') |
13 |
| - |
14 |
| -# Initialize empty list to store parsed transactions |
15 |
| -transactions = [] |
16 |
| - |
17 |
| -# Variables to hold the current Customer Name, Tax ID, Sec Code, and Description |
18 |
| -current_customer_name = "" |
19 |
| -current_tax_id = "" |
20 |
| -current_sec_code = "" |
21 |
| -current_description = "" |
22 |
| - |
23 |
| -# Function to parse a single transaction line starting with 62 |
24 |
| -def parse_62_line(line): |
25 |
| - transaction = { |
26 |
| - "Customer Name": current_customer_name, |
27 |
| - "Tax ID": current_tax_id, |
28 |
| - "Sec Code": current_sec_code, |
29 |
| - "Description": current_description, |
30 |
| - "Routing Number": line[3:12].strip(), |
31 |
| - "Account Number": line[12:16].strip(), |
32 |
| - "Amount": line[16:26].strip(), |
33 |
| - "Payee": line[26:].strip(), |
34 |
| - "Memo": "" # Memo will be added if a line starting with 705 is found |
35 |
| - } |
36 |
| - transactions.append(transaction) |
37 |
| - |
38 |
| -# Process all text files in the directory |
39 |
| -for filename in os.listdir(directory): |
40 |
| - if filename.endswith('.txt'): |
41 |
| - with open(os.path.join(directory, filename), 'r') as file: |
42 |
| - lines = file.readlines() |
43 |
| - for line in lines: |
44 |
| - line = line.strip() |
45 |
| - |
46 |
| - # Parse Customer Name, Tax ID, Sec Code, and Description for lines starting with 52 |
47 |
| - if transaction_start_pattern.match(line): |
48 |
| - current_customer_name = line[4:34].strip() # Extract Customer Name from positions 4 to 34 |
49 |
| - current_tax_id = line[34:45].strip() # Extract Tax ID from positions 34 to 45 |
50 |
| - current_sec_code = line[45:48].strip() # Extract Sec Code from positions 45 to 48 |
51 |
| - current_description = line[48:58].strip() # Extract Description from positions 48 to 58 |
52 |
| - |
53 |
| - # Parse each transaction line starting with 62 |
54 |
| - elif routing_number_pattern.match(line): |
55 |
| - parse_62_line(line) |
56 |
| - |
57 |
| - # Add Memo to the last transaction if line starts with 705 |
58 |
| - elif memo_pattern.match(line) and transactions: |
59 |
| - transactions[-1]["Memo"] = line[3:].strip() |
60 |
| - |
61 |
| - # Identify end of transaction group |
62 |
| - elif transaction_end_pattern.match(line): |
63 |
| - continue # Move to the next group of transactions |
64 |
| - |
65 |
| -# Convert transactions list to DataFrame and save as CSV |
66 |
| -df = pd.DataFrame(transactions) |
67 |
| -df.to_csv('/mnt/data/parsed_transactions.csv', index=False) |
68 |
| -print("Parsed data saved to parsed_transactions.csv") |
0 commit comments