64 lines
1.7 KiB
Python
64 lines
1.7 KiB
Python
from faker import Faker
|
|
from dotenv import load_dotenv
|
|
import os
|
|
import json
|
|
import boto3
|
|
from datetime import datetime, timezone
|
|
import uuid
|
|
|
|
# ---- Setup ----
|
|
fake = Faker()
|
|
load_dotenv()
|
|
|
|
s3 = boto3.client(
|
|
's3',
|
|
endpoint_url=os.getenv('STORAGE_ENDPOINT'),
|
|
aws_access_key_id=os.getenv('STORAGE_ACCESS_KEY'),
|
|
aws_secret_access_key=os.getenv('STORAGE_SECRET_KEY')
|
|
)
|
|
|
|
bucket_name = os.getenv('STORAGE_BUCKET')
|
|
|
|
# Bronze landing zone (RAW)
|
|
branches_prefix = "bronze/branches_raw/"
|
|
|
|
# ---- Generate branch events ----
|
|
events = []
|
|
|
|
now_utc = datetime.now(timezone.utc)
|
|
|
|
for _ in range(3):
|
|
event = {
|
|
"event_id": str(uuid.uuid4()),
|
|
"event_type": "branch_created",
|
|
"event_ts": now_utc.isoformat(),
|
|
"branch": {
|
|
"branch_id": str(uuid.uuid4()),
|
|
"branch_name": f"{fake.city()} Branch",
|
|
"address": fake.street_address(),
|
|
"city": fake.city(),
|
|
"state": fake.state_abbr(),
|
|
"open_date": fake.date_between(start_date="-30d", end_date="today").isoformat(), # New in the last 30 days
|
|
"employee_count": fake.random_int(min=5, max=50),
|
|
"branch_manager": fake.name(),
|
|
"phone_number": fake.phone_number(),
|
|
"timezone": fake.timezone()
|
|
},
|
|
"source_system": "branch_generator",
|
|
"ingestion_ts": now_utc.isoformat()
|
|
}
|
|
|
|
events.append(event)
|
|
|
|
# ---- Write events as JSON lines ----
|
|
key = f"{branches_prefix}batch_{now_utc.strftime('%Y%m%d_%H%M%S')}.json"
|
|
|
|
body = "\n".join(json.dumps(e) for e in events)
|
|
|
|
s3.put_object(
|
|
Bucket=bucket_name,
|
|
Key=key,
|
|
Body=body.encode("utf-8")
|
|
)
|
|
|
|
print(f"Wrote {len(events)} raw branch events to s3://{bucket_name}/{key}")
|