Arpit Shrivastava143
Arpit Shrivastava143
Reg No : RA2211033010143
Section : AB2
Question 1.
CODE :
import pandas as pd
from datetime import datetime
csv_data = pd.read_csv("covid_data.csv")
json_data = pd.read_json("covid_data.json")
excel_data = pd.read_excel("covid_data.xlsx")
df = pd.concat([csv_data, json_data, excel_data], ignore_index=True)
def standardize_date(date):
for fmt in ("%m/%d/%Y", "%Y-%m-%d", "%d-%m-%Y"):
try:
return pd.to_datetime(date, format=fmt).strftime('%Y-%m-%d')
except:
continue
return pd.NaT # Return NaT for unparsable dates
df['test_date'] = df['test_date'].apply(standardize_date)
df['test_date'] = pd.to_datetime(df['test_date'], errors='coerce') # Ensure
datetime type
def clean_test_result(result):
result = str(result).strip().lower()
if result in ['positive', 'pos', 'covid+ve']:
return 'Positive'
elif result in ['negative', 'neg', 'covid-ve']:
return 'Negative'
else:
return pd.NA
df['test_result'] = df['test_result'].apply(clean_test_result)
df.sort_values(by='test_date', ascending=False, inplace=True)
df.drop_duplicates(subset='patient_id', keep='first', inplace=True)
def infer_vaccination_status(row):
if pd.isna(row['vaccination_status']):
if str(row.get('visit_reason', '')).lower() == 'vaccination' or \
str(row.get('hospital_visit', '')).lower() == 'vaccination':
return 'Vaccinated'
else:
return 'Unknown'
return row['vaccination_status']
df['vaccination_status'] = df.apply(infer_vaccination_status, axis=1)
print(df.head())
#OUTPUT
12/01/20 Vaccinatio
101 POS NaN
21 n
2021-11-
101 Positive NaN Checkup
01
01-10-
102 Negative Vaccinated Fever
2021
2021-10- Vaccinatio
103 Covid+ve NaN
05 n
05-12-
104 NEG. NaN Headache
2021
2021-12- Vaccinatio
105 covid-ve NaN
01 n
Question 2.
CODE :
import pandas as pd
data = {
'patient_name': ['joHN doE', 'Alice SMITH', 'Dr. Michael Brown', 'sarah
O\'connor'],
'age': [35, -10, 45, 200],
'spo2': [98, 35, 89, 102],
'hospitalized': [None, None, None, None]
}
df = pd.DataFrame(data)
def clean_name(name):
name = str(name).strip()
if name.lower().startswith('dr.'):
name = name[3:].strip()
return name.title()
df['patient_name'] = df['patient_name'].apply(clean_name)
df = df[(df['spo2'] >= 40) & (df['spo2'] <= 100)]
df = df[(df['age'] >= 0) & (df['age'] <= 120)]
df['hospitalized'] = df.apply(
lambda row: 'Yes' if row['spo2'] < 90 else 'No' if pd.isna(row['hospitalized'])
else row['hospitalized'],
axis=1
)
print(df)
#OUTPUT
Simulated Output:
patient_name age spo2 hospitalized
0 John Doe 35 98 No
1 Michael Brown 45 89 Yes