P 10 DAta Visulization and Storytelling

 

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

np.random.seed(42)  

data = pd.DataFrame({
    "variable1": np.random.normal(0, 1, 1000),
    "variable2": np.random.normal(2, 2, 1000) + 0.5 * np.random.normal(0, 1, 1000),
    "variable3": np.random.normal(-1, 1.5, 1000),
    "category": pd.Series(
        np.random.choice(["A", "B", "C", "D"], size=1000, p=[0.4, 0.3, 0.2, 0.1]),
        dtype="category",
    ),
})

plt.figure(figsize=(10, 6))
plt.scatter(data["variable1"], data["variable2"], alpha=0.5)
plt.title("Relationship between Variable 1 and Variable 2", fontsize=16)
plt.xlabel("Variable 1", fontsize=14)
plt.ylabel("Variable 2", fontsize=14)
plt.show()

plt.figure(figsize=(10, 6))
sns.countplot(x="category", data=data)
plt.title("Distribution of Categories", fontsize=16)
plt.xlabel("Category", fontsize=14)
plt.ylabel("Count", fontsize=14)
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(10, 8))
numerical_cols = ["variable1", "variable2", "variable3"]
sns.heatmap(data[numerical_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap", fontsize=16)
plt.show()

print("Title: Exploring the Relationship between Variable 1 and Variable 2")
print("\nThe scatter plot (Figure 1) shows the relationship between Variable 1 and Variable 2. "
      "We can observe a positive correlation, indicating that as Variable 1 increases, Variable 2 tends to "
      "increase as well. However, there is a considerable amount of scatter, suggesting that other factors "
      "may influence this relationship.")
print("\nScatter Plot")
print("Figure 1: Scatter Plot of Variable 1 and Variable 2")
print("\nTo better understand the distribution of the categorical variable 'category', we created a "
      "bar chart (Figure 2). The chart reveals that Category A has the highest frequency, followed by "
      "Category B, Category C, and Category D. This information could be useful for further analysis "
      "or decision-making processes.")
print("\nBar Chart")
print("Figure 2: Distribution of Categories")
print("\nAdditionally, we explored the correlation between numerical variables using a heatmap "
      "(Figure 3). The heatmap shows that Variable 1 and Variable 2 have a strong positive correlation, "
      "confirming the observation from the scatter plot. However, we can also see that Variable 3 has a "
      "moderate negative correlation with both Variable 1 and Variable 2, suggesting that it may have "
      "an opposing effect on the relationship between the first two variables.")
print("\nHeatmap")
print("Figure 3: Correlation Heatmap")
print("\nIn summary, the visualizations and analysis provide insights into the relationships "
      "between variables, the distribution of categories, and the correlations between numerical "
      "variables. These findings can be used to inform further analysis, decision-making, or to generate "
      "new hypotheses for investigation.")