Uncategorized

machine learning – Running a regression using Elastic Net in python keep getting NAN


I’m trying to predict crypto returns for different coins.
My code works as follows, take the last 500 days as the training sample, use today information to predict tomorrows returns, then the next day repeat the process and do that for all of the coins. It also builds a trading strategy too.

When I go to look at the predictions, they are all. NAN’s. I am happy to provide a sample of the dataframe, ‘final_full_df’, if this is useful. The dataset is about 50 columns long with 2,500 rows long.

```
`
import pandas as pd
from sklearn.linear_model import ElasticNetCV
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import statsmodels.api as sm
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Assuming 'final_full_df' is your DataFrame
final_full_df['Returns_lead'] = final_full_df.groupby('asset')['Returns'].shift(-1)

# Select relevant columns for regression
columns_for_regression = ['asset', 'Active address', 'Market cap', 'Price',
                          'Transaction count', 'Returns', 'ActAdd_MktCap', 'Tran_MktCap',
                          'Returns_EW_MKT', 'Returns_MRKT_CAP_W', 'RF', 'Excess Returns',
                          'Mkt_RF', 'RSI', 'MACD', 'Signal_Line', 'Short_MA', 'Long_MA', 'ROC',
                          'EMA', 'MA_Distance_1W', 'MA_Distance_2W', 'MA_Distance_3W',
                          'MA_Distance_4W', 'MA_Distance_50Day', 'WMA', 'DEMA', 'returns_1D',
                          'returns_2D','returns_3D', 'returns_6D','returns^2','returns^3', 'Historical_Volatility_10D',
                          'Historical_Volatility_30D', 'Historical_Volatility_60D',
                          'Historical_Volatility_90D', 'Historical_Volatility_180D',
                          'Downside_Volatility_10D', 'Downside_Volatility_30D',
                          'Downside_Volatility_60D', 'Downside_Volatility_90D',
                          'Downside_Volatility_180D', 'Highest_7D', 'Lowest_7D',
                          'Average_True_Range_7D', 'Chaikins_Volatility_EMA', 'Upper_Band',
                          'Lower_Band', 'Bollinger_Band_Range', 'percentage_bandwidth', 'QCOM',
                          'INTC', '2330.TW', 'TXN', '^GSPC', 'AMAT', '^IXIC', 'AMD', 'NXPI',
                          'AVGO', 'MU', 'ASML', 'NVDA', 'TSM','Returns_lead']

# Create an empty DataFrame to store results
results_df = pd.DataFrame(index=final_full_df.index)

# Get unique coin names
coin_names = final_full_df['asset'].unique()

# Iterate through each coin
for asset_name in coin_names:
    print(f"\nAnalyzing {asset_name}...")

    try:
        # Filter data for the current coin
        coin_data = final_full_df[final_full_df['asset'] == asset_name]

        # Drop rows with missing values
        coin_data = coin_data.dropna(subset=columns_for_regression)

        # Extract features (X) and target variable (y)
        X = coin_data[columns_for_regression].drop(['asset', 'Returns', 'Returns_lead'], axis=1)
        y = coin_data['Returns_lead'].dropna()  # Drop NaN values in the target variable

        # Normalize features using StandardScaler
        scaler = StandardScaler()
        X_normalized = scaler.fit_transform(X)

        # Initialize Elastic Net regression model with cross-validated alpha and l1_ratio selection
        model = ElasticNetCV(cv=5)#, max_iter=5000)  # Increase max_iter to a higher value

        # Set the training window size and step size
        training_window_size = 500
        step_size = 50

        # Initialize arrays for the trading strategy
        strategy_df = pd.DataFrame(index=coin_data.index)
        strategy_df['Actual Returns'] = coin_data['Returns_lead']
        strategy_df['Predicted Returns'] = 0
        strategy_df['Buy_or_Sell'] = 0

        # Iterate through the data with the specified training and testing procedure
        for i in range(0, len(coin_data) - training_window_size - step_size, step_size):
            train_end_index = i + training_window_size
            test_start_index = train_end_index
            test_end_index = test_start_index + step_size

            # Extract the training and testing sets
            X_train, y_train = X_normalized[i:train_end_index], y.iloc[i:train_end_index]
            X_test, y_test = X_normalized[test_start_index:test_end_index], y.iloc[test_start_index:test_end_index]

            # Fit the Elastic Net model with cross-validated alpha and l1_ratio
            model.fit(X_train, y_train)

            # Predict returns on the test set
            y_pred = model.predict(X_test)

            # Store predicted returns in the array
            strategy_df['Predicted Returns'][test_start_index:test_end_index] = y_pred

            # Update the trading strategy
            threshold = 0
            strategy_df['Buy_or_Sell'][strategy_df['Predicted Returns'] >= threshold] = 1
            strategy_df['Buy_or_Sell'][strategy_df['Predicted Returns'] <= -threshold] = -1

        # Calculate gain/loss
        strategy_df['Gain_Loss'] = strategy_df['Buy_or_Sell'] * strategy_df['Actual Returns']

        # Calculate cumulative returns for the trading strategy
        strategy_df['Cumulative_Returns'] = (1 + strategy_df['Gain_Loss']).cumprod()

        # Add results to the results_df DataFrame
        results_df[asset_name] = strategy_df['Cumulative_Returns']

        print(f"{asset_name} analysis completed.")
    except Exception as e:
        print(f"Error analyzing {asset_name}: {str(e)}. Skipping analysis.")

# Display the results DataFrame
print(results_df)`
```



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *