Académique Documents
Professionnel Documents
Culture Documents
import seaborn as sn
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pointbiserialr
from scipy.stats import pearsonr
df = pd.read_excel('AMPATUAN-FLORDELIZA-MHA.xlsx',
sheet_name='Correlation')
print(df)
corr_matrix = df.corr(method='pearson')
corr_matrix.round(decimals=1)
print(corr_matrix)
sn.heatmap(corr_matrix, annot=True)
plt.show()
sn.pairplot(corr_matrix, kind='reg', diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x1cca97082e0>
def calculate_pvalues(df):
dfcols = pd.DataFrame(columns=df.columns)
pvalues = dfcols.transpose().join(dfcols, how='outer')
for r in df.columns:
for c in df.columns:
tmp = df[df[r].notnull() & df[c].notnull()]
pvalues[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)
return pvalues
pvals = calculate_pvalues(corr_matrix)
# Label the matrix with "Significant" where the mask is True, and "Not
Significant" where the mask is False
matrix_labels = np.where(mask, "Significant", "Not Significant")
# Print the original matrix and its labels
print("Original matrix:")
print(pvals)
print("\nMatrix with labels:")
print(matrix_labels)
Original matrix:
Age Sex Tenure Autonomy Environmental
Mastery \
Age 0.0 0.8329 0.1117 0.1626
0.5771
Sex 0.8329 0.0 0.6122 0.1013
0.5105
Tenure 0.1117 0.6122 0.0 0.0293
0.0174
Autonomy 0.1626 0.1013 0.0293 0.0
0.0294
Environmental Mastery 0.5771 0.5105 0.0174 0.0294
0.0
Personal Growth 0.5178 0.9864 0.0954 0.0649
0.0504
Positive Relations 0.2818 0.0021 0.3841 0.1532
0.789
Purpose in Life 0.0229 0.0671 0.0244 0.4076
0.1411
Self-Acceptance 0.2646 0.0228 0.3815 0.0017
0.124
Pay 0.0446 0.3247 0.7924 0.4498
0.1614
Promotion 0.338 0.0014 0.6059 0.0342
0.7525
Supervision 0.1364 0.0153 0.2598 0.2244
0.7555
Fringe Benefits 0.0021 0.1978 0.2012 0.0906
0.0079
Contingent rewards 0.0036 0.1151 0.0308 0.6599
0.3417
Operating conditions 0.9643 0.0652 0.1374 0.3704
0.0186
Coworkers 0.0079 0.0359 0.4358 0.975
0.1484
Nature of work 0.0125 0.6154 0.2298 0.7362
0.4302
Communication 0.0053 0.3553 0.0292 0.0
0.0426
Total satisfaction 0.0001 0.1568 0.3471 0.4793
0.1067
Total satisfaction
Age 0.0001
Sex 0.1568
Tenure 0.3471
Autonomy 0.4793
Environmental Mastery 0.1067
Personal Growth 0.762
Positive Relations 0.1412
Purpose in Life 0.0161
Self-Acceptance 0.3263
Pay 0.0
Promotion 0.0223
Supervision 0.5514
Fringe Benefits 0.0
Contingent rewards 0.0007
Operating conditions 0.2368
Coworkers 0.0
Nature of work 0.0148
Communication 0.0359
Total satisfaction 0.0
df_pvalues = pd.DataFrame(matrix_labels)
print(df_pvalues)
0 1 2 3
\
0 Significant Not Significant Not Significant Not Significant
8 9 10 11
\
0 Not Significant Significant Not Significant Not Significant
12 13 14 15
\
0 Significant Significant Not Significant Significant
16 17 18
0 Significant Significant Significant
1 Not Significant Not Significant Not Significant
2 Not Significant Significant Not Significant
3 Not Significant Significant Not Significant
4 Not Significant Significant Not Significant
5 Not Significant Not Significant Not Significant
6 Not Significant Not Significant Not Significant
7 Not Significant Not Significant Significant
8 Not Significant Significant Not Significant
9 Not Significant Not Significant Significant
10 Not Significant Not Significant Significant
11 Not Significant Not Significant Not Significant
12 Not Significant Significant Significant
13 Not Significant Not Significant Significant
14 Not Significant Not Significant Not Significant
15 Not Significant Not Significant Significant
16 Significant Not Significant Significant
17 Not Significant Significant Significant
18 Significant Significant Significant
# Create a list of 18 datasets, each as a pandas dataframe
datasets = [df['Age'], df['Tenure'],df['Autonomy'],df['Environmental
Mastery'],df['Personal Growth'],df['Positive Relations'],df['Purpose
in Life'],df['Self-
Acceptance'],df['Pay'],df['Promotion'],df['Supervision'],df['Fringe
Benefits'],df['Contingent rewards'],df['Operating
conditions'],df['Coworkers'],df['Nature of
work'],df['Communication'],df['Total satisfaction']]
# Loop through each dataset and compute the point biserial correlation
with x
for i, dataset in enumerate(datasets):
y = dataset.iloc[:] # assuming 'y' is the last column of each
dataframe
r, p = pointbiserialr(x, y)
print(f"Point biserial correlation for dataset {i+1}: {r:.2f}, p-
value: {p:.4f}")
corr_matrix.to_excel("output.xlsx",sheet_name="Corr_matrix")
pvals.to_excel("pvalues.xlsx",sheet_name="pvalues")
df_pvalues.to_excel("Significance.xlsx",sheet_name = "significance")