import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
np.random.seed(42)
data = pd.DataFrame({
"variable1": np.random.normal(0, 1, 1000),
"variable2": np.random.normal(2, 2, 1000) + 0.5 * np.random.normal(0, 1, 1000),
"variable3": np.random.normal(-1, 1.5, 1000),
"category": pd.Series(
np.random.choice(["A", "B", "C", "D"], size=1000, p=[0.4, 0.3, 0.2, 0.1]),
dtype="category",
),
})
plt.figure(figsize=(10, 6))
plt.scatter(data["variable1"], data["variable2"], alpha=0.5)
plt.title("Relationship between Variable 1 and Variable 2", fontsize=16)
plt.xlabel("Variable 1", fontsize=14)
plt.ylabel("Variable 2", fontsize=14)
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x="category", data=data)
plt.title("Distribution of Categories", fontsize=16)
plt.xlabel("Category", fontsize=14)
plt.ylabel("Count", fontsize=14)
plt.xticks(rotation=45)
plt.show()
plt.figure(figsize=(10, 8))
numerical_cols = ["variable1", "variable2", "variable3"]
sns.heatmap(data[numerical_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap", fontsize=16)
plt.show()
print("Title: Exploring the Relationship between Variable 1 and Variable 2")
print("\nThe scatter plot (Figure 1) shows the relationship between Variable 1 and Variable 2. "
"We can observe a positive correlation, indicating that as Variable 1 increases, Variable 2 tends to "
"increase as well. However, there is a considerable amount of scatter, suggesting that other factors "
"may influence this relationship.")
print("\nScatter Plot")
print("Figure 1: Scatter Plot of Variable 1 and Variable 2")
print("\nTo better understand the distribution of the categorical variable 'category', we created a "
"bar chart (Figure 2). The chart reveals that Category A has the highest frequency, followed by "
"Category B, Category C, and Category D. This information could be useful for further analysis "
"or decision-making processes.")
print("\nBar Chart")
print("Figure 2: Distribution of Categories")
print("\nAdditionally, we explored the correlation between numerical variables using a heatmap "
"(Figure 3). The heatmap shows that Variable 1 and Variable 2 have a strong positive correlation, "
"confirming the observation from the scatter plot. However, we can also see that Variable 3 has a "
"moderate negative correlation with both Variable 1 and Variable 2, suggesting that it may have "
"an opposing effect on the relationship between the first two variables.")
print("\nHeatmap")
print("Figure 3: Correlation Heatmap")
print("\nIn summary, the visualizations and analysis provide insights into the relationships "
"between variables, the distribution of categories, and the correlations between numerical "
"variables. These findings can be used to inform further analysis, decision-making, or to generate "
"new hypotheses for investigation.")