diff --git a/Scripts/Generate_transactions.py b/Scripts/Generate_transactions.py deleted file mode 100644 index f3e3712..0000000 --- a/Scripts/Generate_transactions.py +++ /dev/null @@ -1,115 +0,0 @@ -from faker import Faker -from dotenv import load_dotenv -from datetime import datetime -import os -import random -import pandas as pd -import boto3 - -# ---- Setup ---- -fake = Faker() -load_dotenv() - -s3 = boto3.resource( - "s3", - endpoint_url=os.getenv("STORAGE_ENDPOINT"), - aws_access_key_id=os.getenv("STORAGE_ACCESS_KEY"), - aws_secret_access_key=os.getenv("STORAGE_SECRET_KEY") -) - -bucket_name = os.getenv("STORAGE_BUCKET") -accounts_key = "DataLab/accounts/accounts.csv" -transactions_s3_key = "DataLab/transactions/transactions.csv" - -# ---- Ensure local data folder exists ---- -os.makedirs("../Data", exist_ok=True) - -# ---- Download accounts.csv from S3 ---- -local_accounts_file = "../Data/accounts.csv" -try: - s3.Bucket(bucket_name).download_file(accounts_key, local_accounts_file) - print("Downloaded accounts.csv from S3.") -except Exception as e: - print("ERROR: Could not download accounts.csv:", e) - raise SystemExit() - -# ---- Load accounts DataFrame ---- -accounts_df = pd.read_csv(local_accounts_file) - -# ---- Sample vendors ---- -vendors = ["Amazon", "Walmart", "Target", "Starbucks", "Apple", "Netflix", "Uber", "Lyft", "BestBuy", "Costco"] - -# ---- Helper Functions ---- -def generate_transaction_id(account_id, idx): - """Generate a unique transaction ID combining account ID and index.""" - return f"{account_id}{str(idx).zfill(5)}" - -def generate_transaction(account): - """Generate a realistic transaction for a given account.""" - t_type = random.choices( - ["Deposit", "Withdrawal", "Payment", "Transfer"], - weights=[0.4, 0.3, 0.2, 0.1], k=1 - )[0] - - transaction_data = { - "transaction_id": None, # fill later - "account_id": account['account_id'], - "branch_id": None, - "transaction_type": t_type, - "amount": 0, - "date": fake.date_between(start_date=pd.to_datetime(account['open_date']), end_date=datetime.today()), - "balance_after": 0, - "vendor": None, - "transaction_location": None - } - - if t_type in ["Deposit", "Withdrawal"]: - # Pick one of the branches for deposit/withdrawal - transaction_data["branch_id"] = account['branch_id'] - amount = round(random.uniform(50, 7000), 2) if t_type == "Withdrawal" else round(random.uniform(20, 10000), 2) - if t_type == "Withdrawal": - amount = min(amount, account['balance']) - account['balance'] -= amount - else: - account['balance'] += amount - transaction_data["amount"] = amount - transaction_data["balance_after"] = round(account['balance'], 2) - transaction_data["transaction_location"] = f"Branch {account['branch_id']}" - - else: # Payment or Transfer - transaction_data["branch_id"] = None - transaction_data["vendor"] = random.choice(vendors) - amount = round(random.uniform(5, 1000), 2) - account['balance'] = max(account['balance'] - amount, 0) - transaction_data["amount"] = amount - transaction_data["balance_after"] = round(account['balance'], 2) - transaction_data["transaction_location"] = "POS / Online" - - return transaction_data - -# ---- Generate transactions ---- -transactions = [] -idx = 1 - -for _, account in accounts_df.iterrows(): - account_transactions_count = random.randint(5, 20) - for _ in range(account_transactions_count): - txn = generate_transaction(account) - txn['transaction_id'] = generate_transaction_id(account['account_id'], idx) - transactions.append(txn) - idx += 1 - -# ---- Convert to DataFrame ---- -transactions_df = pd.DataFrame(transactions) - -# ---- Save locally ---- -local_transactions_file = "../Data/transactions.csv" -transactions_df.to_csv(local_transactions_file, index=False) -print("Generated transactions.csv locally with realistic branch/vendor data.") - -# ---- Upload to S3 ---- -try: - s3.Bucket(bucket_name).upload_file(local_transactions_file, transactions_s3_key) - print(f"Uploaded transactions.csv to s3://{bucket_name}/{transactions_s3_key}") -except Exception as e: - print("ERROR: Could not upload transactions.csv to S3:", e)