You can show your dependent variable effect with other two categorical or binary variables suign this method.
Step 1: make a pivot table
import pandas as pd
# Assuming df is your existing DataFrame
# Create the pivot table
pivot_table = df.pivot_table(values='tit', index='g_type', columns='n_lane', aggfunc='count', fill_value=0)
# Display the pivot table
print(pivot_table)
Step 2: find the significant t-value
from scipy.stats import ttest_ind
# Filter the DataFrame for each group and lane
hetero_lane_2_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 2)]['tit']
hetero_lane_3_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 3)]['tit']
homo_lane_2_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 2)]['tit']
homo_lane_3_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 3)]['tit']
# Perform t-tests
t_stat_hetero, p_value_hetero = ttest_ind(hetero_lane_2_data, hetero_lane_3_data, nan_policy='omit')
t_stat_homo, p_value_homo = ttest_ind(homo_lane_2_data, homo_lane_3_data, nan_policy='omit')
print(f"P-value for Hetero group between Lane 2 and Lane 3: {p_value_hetero}")
print(f"P-value for Homo group between Lane 2 and Lane 3: {p_value_homo}")
Step 3: plot the significant bar with box
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
# Set Seaborn theme
sns.set_theme(style="whitegrid")
# Create the box plot
plt.figure(figsize=(10, 6))
boxplot = sns.boxplot(data=df, x='g_type', y='tit', hue='n_lane', palette='Set3')
# Set labels and title
plt.xlabel('Group Type', fontsize=12) # Change x-axis label
plt.ylabel('TIT', fontsize=12) # Change y-axis label
plt.title('', fontsize=14) # Change plot title
# Perform t-tests
hetero_lane_2_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 2)]['tit']
hetero_lane_3_data = df[(df['g_type'] == 'hetero') & (df['n_lane'] == 3)]['tit']
homo_lane_2_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 2)]['tit']
homo_lane_3_data = df[(df['g_type'] == 'homo') & (df['n_lane'] == 3)]['tit']
t_stat_hetero, p_value_hetero = ttest_ind(hetero_lane_2_data, hetero_lane_3_data, nan_policy='omit')
t_stat_homo, p_value_homo = ttest_ind(homo_lane_2_data, homo_lane_3_data, nan_policy='omit')
# Function to add significance bars with symbols
def add_significance_bar(ax, x1, x2, y, p_value, bar_color='k', sig_marker='***', y_offset=0.1):
level = 1 # Set the significance bar level
bar_height_top = y + 0.1 * level
bar_height_bottom = y - 0.1 * level
# Adjust x-positions for center placement relative to the boxplot
x_center = (x1 + x2) / 2
# Add bars above and below the box plot
ax.plot([x1, x1, x2, x2], [bar_height_top, y, y, bar_height_bottom], lw=1, c=bar_color)
# Annotate with significance level text
if p_value < 0.001:
sig_symbol = '***'
elif p_value < 0.01:
sig_symbol = '**'
elif p_value < 0.05:
sig_symbol = '*'
else:
sig_symbol = ''
# Adjust vertical alignment based on y_offset
if y_offset > 0:
va = 'bottom'
else:
va = 'top'
ax.text(x_center, y + 0.01 * y_offset, sig_symbol, ha='center', va='bottom', c='red') # Significance level text
# Specify x-positions for significance bars (placeholders, adjusted in function)
x1_hetero = 0.2
x2_hetero = 1.2
x1_homo = -0.2
x2_homo = 0.8
# Add significance bars and annotate with p-values
add_significance_bar(plt.gca(), x1_hetero, x2_hetero, 50, p_value_hetero, bar_color='blue', sig_marker='***', y_offset=0.2) # Adjust the y position here
add_significance_bar(plt.gca(), x1_homo, x2_homo, 45, p_value_homo, bar_color='blue', sig_marker='***', y_offset=0.2) # Adjust the y position here
# Change legend title
plt.legend(title='Lane Number', loc='upper right')
# Show plot
plt.show()
0 comments:
Post a Comment