Skip to content

Commit 011dcfa

Browse files
authored
Add files via upload
1 parent 53832bc commit 011dcfa

File tree

6 files changed

+459
-0
lines changed

6 files changed

+459
-0
lines changed
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
import streamlit as st
2+
import pandas as pd
3+
import numpy as np
4+
import plotly.express as px
5+
import matplotlib.pyplot as plt
6+
import seaborn as sns
7+
from statsmodels.tsa.arima.model import ARIMA
8+
import datetime
9+
import plotly.graph_objects as go
10+
import networkx as nx
11+
12+
13+
# Load and preprocess data using st.cache
14+
st.cache_data(hash_funcs={pd.DataFrame: lambda _: None})
15+
def load_data():
16+
df = pd.read_csv('TotalQuestions.csv', parse_dates=['Month'])
17+
df.set_index('Month', inplace=True)
18+
return df
19+
20+
21+
# Sidebar navigation
22+
menu = st.sidebar.selectbox('Navigation', ['Stack Overflow Question Forecast', 'Graphical Analysis', 'Timeline Visualization'])
23+
24+
if menu == 'Stack Overflow Question Forecast':
25+
# Load data
26+
df = load_data()
27+
languages = df.columns.tolist()
28+
29+
30+
def forecast_questions(df, language, future_month, future_year):
31+
model = ARIMA(df[language], order=(5, 1, 0)) # Simple ARIMA model for demonstration
32+
model_fit = model.fit()
33+
last_date = df.index[-1]
34+
future_date = pd.to_datetime(f'{future_year}-{future_month:02d}-01')
35+
months_ahead = (future_date.year - last_date.year) * 12 + future_date.month - last_date.month
36+
if months_ahead <= 0:
37+
raise ValueError("Prediction must have end after start.")
38+
forecast = model_fit.forecast(steps=months_ahead)
39+
return forecast.iloc[-1] # Correctly accessing the last forecasted value
40+
41+
42+
def generate_forecasts(df, language, start_date, periods):
43+
model = ARIMA(df[language], order=(5, 1, 0))
44+
model_fit = model.fit()
45+
forecast = model_fit.forecast(steps=periods)
46+
future_dates = pd.date_range(start=start_date, periods=periods, freq='M')
47+
forecast_df = pd.DataFrame({language: forecast}, index=future_dates)
48+
return forecast_df
49+
50+
51+
# Modify title style
52+
st.markdown(
53+
"<h1 style='color: #87CEEB; font-size: 36px;'>Stack Overflow Question Forecast</h1>",
54+
unsafe_allow_html=True
55+
)
56+
st.markdown("---", unsafe_allow_html=True)
57+
st.subheader('Select Programming Language')
58+
selected_language = st.selectbox('', languages)
59+
60+
col1, col2 = st.columns(2)
61+
with col1:
62+
st.subheader('Select Future Month')
63+
future_month = st.selectbox('', list(range(1, 13)),
64+
format_func=lambda x: datetime.date(1900, x, 1).strftime('%B'))
65+
with col2:
66+
st.subheader('Select Future Year')
67+
future_year = st.selectbox('', list(range(datetime.datetime.now().year, datetime.datetime.now().year + 6)))
68+
69+
# Forecast for the selected month and year
70+
if st.button('Predict'):
71+
try:
72+
prediction = forecast_questions(df, selected_language, future_month, future_year)
73+
st.markdown(
74+
f"<div style='background-color: green; color: white; padding: 10px; border-radius: 5px;'><strong>Predicted number of questions for {selected_language} in {datetime.date(1900, future_month, 1).strftime('%B')} {future_year}: <span style='color: red;'>{int(prediction)}</span></strong></div>",
75+
unsafe_allow_html=True)
76+
77+
# Generate additional forecasts for plots
78+
start_date = df.index[-1] + pd.offsets.MonthBegin()
79+
forecast_df = generate_forecasts(df, selected_language, start_date, 12)
80+
81+
# Plot 1: Count plot of total questions for each month in the selected year
82+
months = pd.date_range(start=f'{future_year}-01-01', end=f'{future_year}-12-31', freq='M')
83+
month_forecasts = [forecast_questions(df, selected_language, month.month, month.year) for month in months]
84+
month_forecast_df = pd.DataFrame({selected_language: month_forecasts}, index=months)
85+
86+
fig1 = px.bar(month_forecast_df, x=month_forecast_df.index.strftime('%B'), y=selected_language,
87+
title=f'Monthly Predictions for {future_year}')
88+
st.plotly_chart(fig1)
89+
90+
# Plot 2: Sum of total number of questions for the next five years including the predicted year
91+
future_years = list(range(datetime.datetime.now().year, future_year + 5))
92+
year_forecasts = []
93+
for year in future_years:
94+
if year <= df.index[-1].year:
95+
year_forecasts.append(df[df.index.year == year][selected_language].sum())
96+
else:
97+
months = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='M')
98+
year_forecasts.append(
99+
sum([forecast_questions(df, selected_language, month.month, month.year) for month in months]))
100+
year_forecast_df = pd.DataFrame({selected_language: year_forecasts}, index=future_years)
101+
102+
fig2 = px.bar(year_forecast_df, x=year_forecast_df.index, y=selected_language,
103+
title=f'Yearly Predictions for Next 5 Years for {selected_language}')
104+
st.plotly_chart(fig2)
105+
106+
# Plot 3: Pie chart of percentage questions predicted for input year month-wise
107+
year_forecast_percent = month_forecast_df / month_forecast_df.sum() * 100
108+
fig3 = px.pie(year_forecast_percent, values=selected_language,
109+
names=year_forecast_percent.index.strftime('%B'),
110+
title=f'Percentage Question Distribution for {selected_language} in {future_year}')
111+
st.plotly_chart(fig3)
112+
113+
# Plot 4: Additional plot as requested (example: line plot for monthly trends)
114+
fig4 = px.line(month_forecast_df, x=month_forecast_df.index, y=selected_language,
115+
title=f'Monthly Trends for {selected_language}')
116+
fig4.update_traces(mode='lines+markers')
117+
fig4.update_layout(xaxis_title='Date', yaxis_title='Number of Questions', plot_bgcolor='rgba(0, 0, 0, 0)')
118+
st.plotly_chart(fig4)
119+
120+
except ValueError as e:
121+
st.error(f"Error: {e}")
122+
123+
elif menu == 'Graphical Analysis':
124+
125+
# Modify title style
126+
st.markdown(
127+
"<h1 style='color: #87CEEB; font-size: 36px;'>Graphical Analysis</h1>",
128+
unsafe_allow_html=True
129+
)
130+
st.markdown("---", unsafe_allow_html=True)
131+
132+
# Load data
133+
df = load_data()
134+
135+
# 1) Annual Line Chart
136+
df_annual = df.resample('A').sum()
137+
fig1 = px.line(df_annual, x=df_annual.index, y=df_annual.columns,
138+
title='Timeline of the number of questions per category (2008-2024)')
139+
st.plotly_chart(fig1)
140+
141+
# 2) Change in Question Counts Over Time
142+
df_change = df.diff()
143+
fig2 = px.line(df_change, x=df_change.index, y=df_change.columns,
144+
title='Change in Question Counts for Each Programming Language Over Time')
145+
st.plotly_chart(fig2)
146+
147+
# 4) Total Number of Questions by Programming Languages
148+
total_questions_by_language = df.sum().sort_values(ascending=False)
149+
fig4 = px.bar(x=total_questions_by_language.index, y=total_questions_by_language.values,
150+
title='Total Number of Questions by Programming Languages')
151+
st.plotly_chart(fig4)
152+
153+
# 5) Individual Temporal Series for Top 5 Languages
154+
top_5_data = df.sum().sort_values(ascending=False).head(5)
155+
top_5_languages = top_5_data.index.tolist()
156+
df_top_5 = df[top_5_languages]
157+
fig5 = px.line(df_top_5, x=df_top_5.index, y=df_top_5.columns,
158+
title='Individual Temporal Series for Top 5 Languages')
159+
st.plotly_chart(fig5)
160+
161+
# 6) Total Number of Questions by Day of the Week
162+
daily_total_questions = df.groupby(df.index.dayofweek).sum().sum(axis=1)
163+
fig6 = px.bar(x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], y=daily_total_questions.values,
164+
title='Total Number of Questions by Day of the Week')
165+
st.plotly_chart(fig6)
166+
167+
# 7) Heatmap of the Correlation Between Programming Languages
168+
correlation_matrix = df.corr()
169+
# Replace 'coolwarm' with a valid Plotly colorscale or a custom colorscale definition
170+
fig7 = px.imshow(correlation_matrix, color_continuous_scale='thermal',title='Correlation Heatmap of Programming Languages')
171+
172+
# Display the plot using Streamlit
173+
st.plotly_chart(fig7)
174+
175+
# 8) Distribution of Questions for Top 10 Languages
176+
top_10_data = df.sum().sort_values(ascending=False).head(10)
177+
top_10_languages = top_10_data.index.tolist()
178+
df_top_10 = df[top_10_languages]
179+
fig8 = px.box(df_top_10, y=df_top_10.columns, title='Distribution of Questions for Top 10 Programming Languages')
180+
st.plotly_chart(fig8)
181+
182+
# Extract top 10 languages by total questions
183+
top_10_data = df.sum().sort_values(ascending=False).head(10)
184+
top_10_languages = top_10_data.index.tolist()
185+
186+
# Filter the DataFrame to include only the top 10 languages
187+
df_top_10 = df[top_10_languages]
188+
189+
# Calculate correlation matrix
190+
corr_matrix = df_top_10.corr()
191+
192+
# Create a graph from the correlation matrix
193+
G = nx.from_numpy_array(corr_matrix.values)
194+
195+
# Plotting the network
196+
plt.figure(figsize=(12, 8))
197+
plt.style.use('dark_background')
198+
pos = nx.spring_layout(G, seed=42) # positions for all nodes
199+
200+
# Draw nodes
201+
nx.draw_networkx_nodes(G, pos, node_size=1500, node_color='skyblue', edgecolors='grey')
202+
203+
# Draw edges
204+
nx.draw_networkx_edges(G, pos, edge_color='grey')
205+
206+
# Draw labels
207+
nx.draw_networkx_labels(G, pos, labels={i: top_10_languages[i] for i in range(len(top_10_languages))}, font_size=10,
208+
font_weight='bold')
209+
210+
plt.title('Network Plot of Top 10 Programming Languages based on Correlation')
211+
plt.show()
212+
213+
# Displaying both graphs sequentially
214+
import streamlit as st
215+
216+
# Display Matplotlib graph
217+
st.pyplot(plt)
218+
219+
220+
elif menu == 'Timeline Visualization':
221+
if menu == 'Timeline Visualization':
222+
st.markdown(
223+
"<h1 style='color: #87CEEB; font-size: 36px;'>Timeline Visualization</h1>",
224+
unsafe_allow_html=True
225+
)
226+
st.markdown("---", unsafe_allow_html=True)
227+
228+
# JavaScript to attempt autoplay
229+
autoplay_js = """
230+
<script>
231+
document.addEventListener('DOMContentLoaded', function(event) {
232+
var video = document.getElementById('autoplay-video');
233+
video.play();
234+
});
235+
</script>
236+
"""
237+
st.markdown(autoplay_js, unsafe_allow_html=True)
238+
239+
# Display the MP4 video with autoplay and larger size
240+
video_path = 'stack_overflow.mp4' # Replace with your actual video file path
241+
video_bytes = open(video_path, 'rb').read()
242+
st.video(video_bytes, start_time=0)
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
## setup
3+
4+
1. **Clone the Repository**:
5+
```sh
6+
git clone url_to_this_repository
7+
```
8+
9+
2. **Install Dependencies**:
10+
```sh
11+
pip install -r requirements.txt
12+
```
13+
14+
3. **Run the Model**:
15+
```python
16+
streamlit run app.py
17+
```
18+
19+
4. **demo** :
20+
![Demo Video](C:/Users/rajvk/Downloads/stack_overflow_programming_language_analysis/demovideo.mp4)
21+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pandas
2+
numpy
3+
scikit-learn
4+
seaborn
5+
plotly
6+
matplotlib
7+
streamlit
8+
networkx
9+
statsmodels
10+
Binary file not shown.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy