fixed code not outputting mixed df
This commit is contained in:
parent
e0cef40f23
commit
e4ccb2297e
1 changed files with 372 additions and 31 deletions
|
|
@ -2,7 +2,7 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 2,
|
||||||
"id": "58867898",
|
"id": "58867898",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|
@ -11,39 +11,33 @@
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Collecting pandas\n",
|
"Collecting pandas\n",
|
||||||
" Using cached pandas-2.3.3-cp310-cp310-win_amd64.whl.metadata (19 kB)\n",
|
" Downloading pandas-2.3.3-cp310-cp310-win_amd64.whl.metadata (19 kB)\n",
|
||||||
"Collecting numpy>=1.22.4 (from pandas)\n",
|
"Collecting numpy>=1.22.4 (from pandas)\n",
|
||||||
" Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)\n",
|
" Downloading numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)\n",
|
||||||
"Collecting python-dateutil>=2.8.2 (from pandas)\n",
|
"Collecting python-dateutil>=2.8.2 (from pandas)\n",
|
||||||
" Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)\n",
|
" Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)\n",
|
||||||
"Collecting pytz>=2020.1 (from pandas)\n",
|
"Collecting pytz>=2020.1 (from pandas)\n",
|
||||||
" Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n",
|
" Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n",
|
||||||
"Collecting tzdata>=2022.7 (from pandas)\n",
|
"Collecting tzdata>=2022.7 (from pandas)\n",
|
||||||
" Using cached tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
" Downloading tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
||||||
"Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas)\n",
|
"Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas)\n",
|
||||||
" Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)\n",
|
" Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)\n",
|
||||||
"Using cached pandas-2.3.3-cp310-cp310-win_amd64.whl (11.3 MB)\n",
|
"Downloading pandas-2.3.3-cp310-cp310-win_amd64.whl (11.3 MB)\n",
|
||||||
"Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl (12.9 MB)\n",
|
" ---------------------------------------- 0.0/11.3 MB ? eta -:--:--\n",
|
||||||
|
" ------------------------------ --------- 8.7/11.3 MB 48.8 MB/s eta 0:00:01\n",
|
||||||
|
" ---------------------------------------- 11.3/11.3 MB 47.4 MB/s 0:00:00\n",
|
||||||
|
"Downloading numpy-2.2.6-cp310-cp310-win_amd64.whl (12.9 MB)\n",
|
||||||
|
" ---------------------------------------- 0.0/12.9 MB ? eta -:--:--\n",
|
||||||
|
" ---------------------------------------- 12.9/12.9 MB 73.5 MB/s 0:00:00\n",
|
||||||
"Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)\n",
|
"Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)\n",
|
||||||
"Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)\n",
|
"Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)\n",
|
||||||
"Using cached six-1.17.0-py2.py3-none-any.whl (11 kB)\n",
|
"Using cached six-1.17.0-py2.py3-none-any.whl (11 kB)\n",
|
||||||
"Using cached tzdata-2025.3-py2.py3-none-any.whl (348 kB)\n",
|
"Downloading tzdata-2025.3-py2.py3-none-any.whl (348 kB)\n",
|
||||||
"Installing collected packages: pytz, tzdata, six, numpy, python-dateutil, pandas\n",
|
"Installing collected packages: pytz, tzdata, six, numpy, python-dateutil, pandas\n",
|
||||||
"\n",
|
"\n",
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
" ---------------------------------------- 0/6 [pytz]\n",
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
" ---------------------------------------- 0/6 [pytz]\n",
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
" ---------------------------------------- 0/6 [pytz]\n",
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
|
||||||
" Attempting uninstall: tzdata\n",
|
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
|
||||||
" Found existing installation: tzdata 2025.3\n",
|
|
||||||
" ---------------------------------------- 0/6 [pytz]\n",
|
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
|
||||||
" Uninstalling tzdata-2025.3:\n",
|
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
|
||||||
" Successfully uninstalled tzdata-2025.3\n",
|
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
" ------ --------------------------------- 1/6 [tzdata]\n",
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
" ------ --------------------------------- 1/6 [tzdata]\n",
|
||||||
" ------ --------------------------------- 1/6 [tzdata]\n",
|
" ------ --------------------------------- 1/6 [tzdata]\n",
|
||||||
|
|
@ -56,6 +50,18 @@
|
||||||
" ------------- -------------------------- 2/6 [six]\n",
|
" ------------- -------------------------- 2/6 [six]\n",
|
||||||
" Successfully uninstalled six-1.17.0\n",
|
" Successfully uninstalled six-1.17.0\n",
|
||||||
" ------------- -------------------------- 2/6 [six]\n",
|
" ------------- -------------------------- 2/6 [six]\n",
|
||||||
|
" ------------- -------------------------- 2/6 [six]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
|
@ -91,12 +97,52 @@
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
" Found existing installation: python-dateutil 2.9.0.post0\n",
|
" Found existing installation: python-dateutil 2.9.0.post0\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------- ------------------- 3/6 [numpy]\n",
|
||||||
|
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
||||||
" Uninstalling python-dateutil-2.9.0.post0:\n",
|
" Uninstalling python-dateutil-2.9.0.post0:\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
||||||
" Successfully uninstalled python-dateutil-2.9.0.post0\n",
|
" Successfully uninstalled python-dateutil-2.9.0.post0\n",
|
||||||
" -------------------- ------------------- 3/6 [numpy]\n",
|
|
||||||
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
||||||
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
||||||
|
" -------------------------- ------------- 4/6 [python-dateutil]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
" --------------------------------- ------ 5/6 [pandas]\n",
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
" --------------------------------- ------ 5/6 [pandas]\n",
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
" --------------------------------- ------ 5/6 [pandas]\n",
|
" --------------------------------- ------ 5/6 [pandas]\n",
|
||||||
|
|
@ -169,10 +215,12 @@
|
||||||
"Successfully installed numpy-2.2.6 pandas-2.3.3 python-dateutil-2.9.0.post0 pytz-2025.2 six-1.17.0 tzdata-2025.3\n",
|
"Successfully installed numpy-2.2.6 pandas-2.3.3 python-dateutil-2.9.0.post0 pytz-2025.2 six-1.17.0 tzdata-2025.3\n",
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
"Collecting faker\n",
|
"Collecting faker\n",
|
||||||
" Using cached faker-39.0.0-py3-none-any.whl.metadata (16 kB)\n",
|
" Downloading faker-39.0.0-py3-none-any.whl.metadata (16 kB)\n",
|
||||||
"Collecting tzdata (from faker)\n",
|
"Collecting tzdata (from faker)\n",
|
||||||
" Using cached tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
" Using cached tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
||||||
"Using cached faker-39.0.0-py3-none-any.whl (2.0 MB)\n",
|
"Downloading faker-39.0.0-py3-none-any.whl (2.0 MB)\n",
|
||||||
|
" ---------------------------------------- 0.0/2.0 MB ? eta -:--:--\n",
|
||||||
|
" ---------------------------------------- 2.0/2.0 MB 15.8 MB/s 0:00:00\n",
|
||||||
"Using cached tzdata-2025.3-py2.py3-none-any.whl (348 kB)\n",
|
"Using cached tzdata-2025.3-py2.py3-none-any.whl (348 kB)\n",
|
||||||
"Installing collected packages: tzdata, faker\n",
|
"Installing collected packages: tzdata, faker\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -186,15 +234,26 @@
|
||||||
"\n",
|
"\n",
|
||||||
" ---------------------------------------- 0/2 [tzdata]\n",
|
" ---------------------------------------- 0/2 [tzdata]\n",
|
||||||
" ---------------------------------------- 0/2 [tzdata]\n",
|
" ---------------------------------------- 0/2 [tzdata]\n",
|
||||||
" Attempting uninstall: faker\n",
|
|
||||||
" ---------------------------------------- 0/2 [tzdata]\n",
|
" ---------------------------------------- 0/2 [tzdata]\n",
|
||||||
" Found existing installation: Faker 39.0.0\n",
|
|
||||||
" ---------------------------------------- 0/2 [tzdata]\n",
|
" ---------------------------------------- 0/2 [tzdata]\n",
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" Uninstalling Faker-39.0.0:\n",
|
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" Successfully uninstalled Faker-39.0.0\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
" -------------------- ------------------- 1/2 [faker]\n",
|
" -------------------- ------------------- 1/2 [faker]\n",
|
||||||
|
|
@ -241,7 +300,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 3,
|
||||||
"id": "d786ab3d",
|
"id": "d786ab3d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|
@ -249,7 +308,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"c:\\Users\\leaf3\\OneDrive\\Desktop\\DataLab\\Data_lab\\fake_data\n"
|
"c:\\Users\\leaf3\\Desktop\\Data Engineer\\datalab\\Data_Lab\\fake_data\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
@ -269,7 +328,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 6,
|
||||||
"id": "46f10552",
|
"id": "46f10552",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|
@ -277,7 +336,9 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Data Exported\n"
|
"Saved files to:\n",
|
||||||
|
"c:\\Users\\leaf3\\Desktop\\Data Engineer\\datalab\\Data_Lab\\fake_data\\healthy_medical_records.csv\n",
|
||||||
|
"c:\\Users\\leaf3\\Desktop\\Data Engineer\\datalab\\Data_Lab\\fake_data\\mixed_medical_records.csv\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
@ -475,12 +536,292 @@
|
||||||
"for column in mixed_df.columns:\n",
|
"for column in mixed_df.columns:\n",
|
||||||
" mixed_df.loc[healthy_mask, column] = healthy_df.loc[healthy_mask, column]\n",
|
" mixed_df.loc[healthy_mask, column] = healthy_df.loc[healthy_mask, column]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"mixed_df = generate_dataset(is_healthy=False)\n",
|
"mixed_df.to_csv(mixed_path, index=False)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Saved files to:\")\n",
|
"print(\"Saved files to:\")\n",
|
||||||
"print(healthy_path)\n",
|
"print(healthy_path)\n",
|
||||||
"print(mixed_path)"
|
"print(mixed_path)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "0b9c67b5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Medical datasets combined and exported successfully.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Load the datasets\n",
|
||||||
|
"healthy_df = pd.read_csv('healthy_medical_records.csv')\n",
|
||||||
|
"mixed_df = pd.read_csv('mixed_medical_records.csv')\n",
|
||||||
|
"\n",
|
||||||
|
"# Concatenate the two DataFrames\n",
|
||||||
|
"output_df = pd.concat([healthy_df, mixed_df], ignore_index=True)\n",
|
||||||
|
"\n",
|
||||||
|
"output_df.fillna('None', inplace=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# Export the final dataset to a CSV file\n",
|
||||||
|
"output_df.to_csv('sample_medical_records.csv', index=False)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Medical datasets combined and exported successfully.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "928bf039",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Patient ID</th>\n",
|
||||||
|
" <th>Last Checkup</th>\n",
|
||||||
|
" <th>Date of Birth</th>\n",
|
||||||
|
" <th>Gender</th>\n",
|
||||||
|
" <th>Ethnicity</th>\n",
|
||||||
|
" <th>Blood Type</th>\n",
|
||||||
|
" <th>Occupation</th>\n",
|
||||||
|
" <th>Insurance Provider</th>\n",
|
||||||
|
" <th>Insurance Plan</th>\n",
|
||||||
|
" <th>Monthly Premium</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>Stroke</th>\n",
|
||||||
|
" <th>Coronary Heart Disease (CHD)</th>\n",
|
||||||
|
" <th>Blood Glucose Level (mmol/L)</th>\n",
|
||||||
|
" <th>HDL Cholesterol (mmol/L)</th>\n",
|
||||||
|
" <th>LDL Cholesterol (mmol/L)</th>\n",
|
||||||
|
" <th>Triglycerides (mmol/L)</th>\n",
|
||||||
|
" <th>Hemoglobin A1C (%)</th>\n",
|
||||||
|
" <th>White Blood Cell Count (10^9/L)</th>\n",
|
||||||
|
" <th>Red Blood Cell Count (10^12/L)</th>\n",
|
||||||
|
" <th>Platelet Count (10^9/L)</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>KP-6511-3073-2278</td>\n",
|
||||||
|
" <td>2024-05-28</td>\n",
|
||||||
|
" <td>1955-08-02</td>\n",
|
||||||
|
" <td>Female</td>\n",
|
||||||
|
" <td>Pacific Islander</td>\n",
|
||||||
|
" <td>B+</td>\n",
|
||||||
|
" <td>Electrician</td>\n",
|
||||||
|
" <td>Pugh, Tate and Green</td>\n",
|
||||||
|
" <td>Comprehensive Care Plan</td>\n",
|
||||||
|
" <td>722.18</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" <td>1.4</td>\n",
|
||||||
|
" <td>2.7</td>\n",
|
||||||
|
" <td>1.4</td>\n",
|
||||||
|
" <td>5.4</td>\n",
|
||||||
|
" <td>9.1</td>\n",
|
||||||
|
" <td>4.8</td>\n",
|
||||||
|
" <td>310.5</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>RK-1845-1516-4239</td>\n",
|
||||||
|
" <td>2023-05-29</td>\n",
|
||||||
|
" <td>1950-09-27</td>\n",
|
||||||
|
" <td>Male</td>\n",
|
||||||
|
" <td>Asian</td>\n",
|
||||||
|
" <td>O+</td>\n",
|
||||||
|
" <td>Nurse</td>\n",
|
||||||
|
" <td>Gonzalez-Martinez</td>\n",
|
||||||
|
" <td>Family Health Coverage</td>\n",
|
||||||
|
" <td>610.27</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" <td>1.3</td>\n",
|
||||||
|
" <td>2.2</td>\n",
|
||||||
|
" <td>0.7</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" <td>5.1</td>\n",
|
||||||
|
" <td>4.6</td>\n",
|
||||||
|
" <td>226.1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>XA-8453-5992-5165</td>\n",
|
||||||
|
" <td>2025-09-28</td>\n",
|
||||||
|
" <td>1958-06-22</td>\n",
|
||||||
|
" <td>Male</td>\n",
|
||||||
|
" <td>Other</td>\n",
|
||||||
|
" <td>O-</td>\n",
|
||||||
|
" <td>Engineer</td>\n",
|
||||||
|
" <td>Kim, Medina and Hawkins</td>\n",
|
||||||
|
" <td>High Deductible Health Plan</td>\n",
|
||||||
|
" <td>652.52</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>4.2</td>\n",
|
||||||
|
" <td>1.3</td>\n",
|
||||||
|
" <td>2.9</td>\n",
|
||||||
|
" <td>1.3</td>\n",
|
||||||
|
" <td>4.2</td>\n",
|
||||||
|
" <td>5.8</td>\n",
|
||||||
|
" <td>4.7</td>\n",
|
||||||
|
" <td>274.4</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>LX-9887-6375-7280</td>\n",
|
||||||
|
" <td>2025-05-24</td>\n",
|
||||||
|
" <td>2005-02-21</td>\n",
|
||||||
|
" <td>Male</td>\n",
|
||||||
|
" <td>Other</td>\n",
|
||||||
|
" <td>B+</td>\n",
|
||||||
|
" <td>Customer Service Representative</td>\n",
|
||||||
|
" <td>Rogers-Baker</td>\n",
|
||||||
|
" <td>Premium Health Insurance</td>\n",
|
||||||
|
" <td>290.39</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>5.1</td>\n",
|
||||||
|
" <td>1.6</td>\n",
|
||||||
|
" <td>2.1</td>\n",
|
||||||
|
" <td>0.7</td>\n",
|
||||||
|
" <td>4.4</td>\n",
|
||||||
|
" <td>10.7</td>\n",
|
||||||
|
" <td>4.6</td>\n",
|
||||||
|
" <td>154.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>WM-9463-4744-1527</td>\n",
|
||||||
|
" <td>2023-06-19</td>\n",
|
||||||
|
" <td>1994-02-23</td>\n",
|
||||||
|
" <td>Male</td>\n",
|
||||||
|
" <td>Native American</td>\n",
|
||||||
|
" <td>O+</td>\n",
|
||||||
|
" <td>Student</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Individual Health Insurance</td>\n",
|
||||||
|
" <td>257.43</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>No</td>\n",
|
||||||
|
" <td>4.7</td>\n",
|
||||||
|
" <td>1.4</td>\n",
|
||||||
|
" <td>2.7</td>\n",
|
||||||
|
" <td>1.2</td>\n",
|
||||||
|
" <td>4.3</td>\n",
|
||||||
|
" <td>5.3</td>\n",
|
||||||
|
" <td>4.9</td>\n",
|
||||||
|
" <td>273.1</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 29 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Patient ID Last Checkup Date of Birth Gender Ethnicity \\\n",
|
||||||
|
"0 KP-6511-3073-2278 2024-05-28 1955-08-02 Female Pacific Islander \n",
|
||||||
|
"1 RK-1845-1516-4239 2023-05-29 1950-09-27 Male Asian \n",
|
||||||
|
"2 XA-8453-5992-5165 2025-09-28 1958-06-22 Male Other \n",
|
||||||
|
"3 LX-9887-6375-7280 2025-05-24 2005-02-21 Male Other \n",
|
||||||
|
"4 WM-9463-4744-1527 2023-06-19 1994-02-23 Male Native American \n",
|
||||||
|
"\n",
|
||||||
|
" Blood Type Occupation Insurance Provider \\\n",
|
||||||
|
"0 B+ Electrician Pugh, Tate and Green \n",
|
||||||
|
"1 O+ Nurse Gonzalez-Martinez \n",
|
||||||
|
"2 O- Engineer Kim, Medina and Hawkins \n",
|
||||||
|
"3 B+ Customer Service Representative Rogers-Baker \n",
|
||||||
|
"4 O+ Student NaN \n",
|
||||||
|
"\n",
|
||||||
|
" Insurance Plan Monthly Premium ... Stroke \\\n",
|
||||||
|
"0 Comprehensive Care Plan 722.18 ... No \n",
|
||||||
|
"1 Family Health Coverage 610.27 ... No \n",
|
||||||
|
"2 High Deductible Health Plan 652.52 ... No \n",
|
||||||
|
"3 Premium Health Insurance 290.39 ... No \n",
|
||||||
|
"4 Individual Health Insurance 257.43 ... No \n",
|
||||||
|
"\n",
|
||||||
|
" Coronary Heart Disease (CHD) Blood Glucose Level (mmol/L) \\\n",
|
||||||
|
"0 No 5.0 \n",
|
||||||
|
"1 No 5.0 \n",
|
||||||
|
"2 No 4.2 \n",
|
||||||
|
"3 No 5.1 \n",
|
||||||
|
"4 No 4.7 \n",
|
||||||
|
"\n",
|
||||||
|
" HDL Cholesterol (mmol/L) LDL Cholesterol (mmol/L) Triglycerides (mmol/L) \\\n",
|
||||||
|
"0 1.4 2.7 1.4 \n",
|
||||||
|
"1 1.3 2.2 0.7 \n",
|
||||||
|
"2 1.3 2.9 1.3 \n",
|
||||||
|
"3 1.6 2.1 0.7 \n",
|
||||||
|
"4 1.4 2.7 1.2 \n",
|
||||||
|
"\n",
|
||||||
|
" Hemoglobin A1C (%) White Blood Cell Count (10^9/L) \\\n",
|
||||||
|
"0 5.4 9.1 \n",
|
||||||
|
"1 5.0 5.1 \n",
|
||||||
|
"2 4.2 5.8 \n",
|
||||||
|
"3 4.4 10.7 \n",
|
||||||
|
"4 4.3 5.3 \n",
|
||||||
|
"\n",
|
||||||
|
" Red Blood Cell Count (10^12/L) Platelet Count (10^9/L) \n",
|
||||||
|
"0 4.8 310.5 \n",
|
||||||
|
"1 4.6 226.1 \n",
|
||||||
|
"2 4.7 274.4 \n",
|
||||||
|
"3 4.6 154.0 \n",
|
||||||
|
"4 4.9 273.1 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 29 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_csv('sample_medical_records.csv')\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "899a271c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue