I am new to the machine learning area. below mention my dataset.
Dataset
I did the prepossessing part and data visualization part. Below mention my coding.
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
df = pd.read_csv('sample_data/covid19_full_data.csv')
df
df.isnull().sum()
print('Dataset length before drop null values')
len(df)
df['date'] = pd.to_datetime(df['date'])
aggregated_data = df.groupby('date').agg({
'new_cases': 'sum',
'new_deaths': 'sum',
'total_cases': 'sum',
'total_deaths': 'sum'
}).reset_index()
aggregated_data
# Plotting new_cases over time
plt.figure(figsize=(14, 7))
plt.plot(aggregated_data['date'], aggregated_data['new_cases'], label="New Cases")
plt.title('New COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Number of New Cases')
plt.legend()
plt.show()
plt.figure(figsize=(14, 7))
plt.plot(aggregated_data['date'], aggregated_data['new_deaths'], label="New Deaths", color="red")
plt.title('New COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Number of New Deaths')
plt.legend()
plt.show()
plt.figure(figsize=(14, 7))
plt.plot(aggregated_data['date'], aggregated_data['total_cases'], label="Total Cases", color="green")
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Number of Cases')
plt.legend()
plt.show()
plt.figure(figsize=(14, 7))
plt.plot(aggregated_data['date'], aggregated_data['total_deaths'], label="Total Deaths", color="black")
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Number of Deaths')
plt.legend()
plt.show()
how to do the feature extraction part to my approach. Please write the code for the feature extraction part and explain each step.