Monday, May 27, 2024

Box Plot in Python with Sig-Bar

You can show your dependent variable effect with other two categorical or binary variables suign this method.

Step 1: make a pivot table

import pandas as pd


# Assuming df is your existing DataFrame

# Create the pivot table

pivot_table = df.pivot_table(values='tit', index='g_type', columns='n_lane', aggfunc='count', fill_value=0)


# Display the pivot table

print(pivot_table)


Step 2: find the significant t-value

from scipy.stats import ttest_ind


# Filter the DataFrame for each group and lane

hetero_lane_2_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 2)]['tit']

hetero_lane_3_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 3)]['tit']

homo_lane_2_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 2)]['tit']

homo_lane_3_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 3)]['tit']


# Perform t-tests

t_stat_hetero, p_value_hetero = ttest_ind(hetero_lane_2_data, hetero_lane_3_data, nan_policy='omit')

t_stat_homo, p_value_homo = ttest_ind(homo_lane_2_data, homo_lane_3_data, nan_policy='omit')


print(f"P-value for Hetero group between Lane 2 and Lane 3: {p_value_hetero}")

print(f"P-value for Homo group between Lane 2 and Lane 3: {p_value_homo}")



Step 3: plot the significant bar with box

import seaborn as sns

import matplotlib.pyplot as plt

from scipy.stats import ttest_ind


# Set Seaborn theme

sns.set_theme(style="whitegrid")


# Create the box plot

plt.figure(figsize=(10, 6))

boxplot = sns.boxplot(data=df, x='g_type', y='tit', hue='n_lane', palette='Set3')


# Set labels and title

plt.xlabel('Group Type', fontsize=12)  # Change x-axis label

plt.ylabel('TIT', fontsize=12)  # Change y-axis label

plt.title('', fontsize=14)  # Change plot title


# Perform t-tests

hetero_lane_2_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 2)]['tit']

hetero_lane_3_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 3)]['tit']

homo_lane_2_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 2)]['tit']

homo_lane_3_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 3)]['tit']


t_stat_hetero, p_value_hetero = ttest_ind(hetero_lane_2_data, hetero_lane_3_data, nan_policy='omit')

t_stat_homo, p_value_homo = ttest_ind(homo_lane_2_data, homo_lane_3_data, nan_policy='omit')


# Function to add significance bars with symbols

def add_significance_bar(ax, x1, x2, y, p_value, bar_color='k', sig_marker='***', y_offset=0.1):

    level = 1  # Set the significance bar level

    bar_height_top = y + 0.1 * level

    bar_height_bottom = y - 0.1 * level


    # Adjust x-positions for center placement relative to the boxplot

    x_center = (x1 + x2) / 2


    # Add bars above and below the box plot

    ax.plot([x1, x1, x2, x2], [bar_height_top, y, y, bar_height_bottom], lw=1, c=bar_color)


    # Annotate with significance level text

    if p_value < 0.001:

        sig_symbol = '***'

    elif p_value < 0.01:

        sig_symbol = '**'

    elif p_value < 0.05:

        sig_symbol = '*'

    else:

        sig_symbol = ''

    

    # Adjust vertical alignment based on y_offset

    if y_offset > 0:

        va = 'bottom'

    else:

        va = 'top'

    

    ax.text(x_center, y + 0.01 * y_offset, sig_symbol, ha='center', va='bottom', c='red')  # Significance level text


# Specify x-positions for significance bars (placeholders, adjusted in function)

x1_hetero = 0.2

x2_hetero = 1.2

x1_homo = -0.2

x2_homo = 0.8


# Add significance bars and annotate with p-values

add_significance_bar(plt.gca(), x1_hetero, x2_hetero, 50, p_value_hetero, bar_color='blue', sig_marker='***', y_offset=0.2)  # Adjust the y position here

add_significance_bar(plt.gca(), x1_homo, x2_homo, 45, p_value_homo, bar_color='blue', sig_marker='***', y_offset=0.2)  # Adjust the y position here


# Change legend title

plt.legend(title='Lane Number', loc='upper right')


# Show plot

plt.show()


0 comments:

Post a Comment