Pyhon Solution
Pyhon Solution
In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('internet.csv')
In [3]:
df.head()
Out[3]:
No. of
Unnamed: Cellular Internet Broadband
Entity Code Year Internet
0 Subscription Users(%) Subscription
Users
In [4]:
df.tail()
Out[4]:
No. of
Unnamed: Cellular Internet Broadband
Entity Code Year Internet
0 Subscription Users(%) Subscription
Users
In [5]:
df.shape
Out[5]:
(8867, 8)
localhost:8888/notebooks/internet.ipynb 1/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [6]:
df.columns
Out[6]:
In [7]:
df.duplicated().sum()
Out[7]:
In [8]:
df.isnull().sum()
Out[8]:
Unnamed: 0 0
Entity 0
Code 0
Year 0
Cellular Subscription 0
Internet Users(%) 0
No. of Internet Users 0
Broadband Subscription 0
dtype: int64
In [9]:
In [10]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8867 entries, 0 to 8866
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Entity 8867 non-null object
1 Code 8867 non-null object
2 Year 8867 non-null int64
3 Cellular Subscription 8867 non-null float64
4 Internet Users(%) 8867 non-null float64
5 No. of Internet Users 8867 non-null int64
6 Broadband Subscription 8867 non-null float64
dtypes: float64(3), int64(2), object(2)
memory usage: 485.0+ KB
localhost:8888/notebooks/internet.ipynb 2/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [11]:
df.describe()
Out[11]:
In [12]:
df.nunique()
Out[12]:
Entity 229
Code 216
Year 41
Cellular Subscription 6344
Internet Users(%) 4702
No. of Internet Users 5058
Broadband Subscription 3858
dtype: int64
In [13]:
obj_cols = df.select_dtypes(include=['object']).columns
In [14]:
In [15]:
In [16]:
import numpy as np
localhost:8888/notebooks/internet.ipynb 3/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [17]:
import warnings
warnings.filterwarnings('ignore')
localhost:8888/notebooks/internet.ipynb 4/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [18]:
df['Entity'].unique()
Out[18]:
localhost:8888/notebooks/internet.ipynb 5/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [19]:
df['Entity'].value_counts()
Out[19]:
Afghanistan 41
Mongolia 41
Mali 41
Malta 41
Mauritania 41
..
Palau 16
Curacao 12
South Sudan 11
Kosovo 6
Sint Maarten (Dutch part) 1
Name: Entity, Length: 229, dtype: int64
In [20]:
df['Code'].unique()
Out[20]:
localhost:8888/notebooks/internet.ipynb 6/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [21]:
df['Entity'].value_counts()
Out[21]:
Afghanistan 41
Mongolia 41
Mali 41
Malta 41
Mauritania 41
..
Palau 16
Curacao 12
South Sudan 11
Kosovo 6
Sint Maarten (Dutch part) 1
Name: Entity, Length: 229, dtype: int64
In [22]:
df['Year'].unique()
Out[22]:
array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], dtype=int64)
localhost:8888/notebooks/internet.ipynb 7/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [23]:
df['Year'].value_counts()
Out[23]:
2020 223
2019 223
2015 222
2012 222
2010 222
1990 222
2004 222
2003 221
2002 221
2014 220
2001 219
2017 219
2013 219
2009 219
2007 219
2005 219
2000 219
1999 218
2011 218
2016 217
1998 217
2008 217
2006 216
1996 216
1997 215
1980 215
1995 215
1992 213
1993 213
1994 213
1981 213
1982 213
1983 212
1991 211
1986 211
1984 211
1988 210
1985 210
1987 209
1989 208
2018 205
Name: Year, dtype: int64
localhost:8888/notebooks/internet.ipynb 8/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [24]:
for i in num_cols:
plt.figure(figsize=(15,6))
sns.histplot(df[i], kde = True, bins = 20, palette = 'hls')
plt.xticks(rotation = 90)
plt.show()
localhost:8888/notebooks/internet.ipynb 9/45
4/24/23, 9:36 PM internet - Jupyter Notebook
localhost:8888/notebooks/internet.ipynb 10/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [25]:
for i in num_cols:
plt.figure(figsize=(15,6))
sns.distplot(df[i], kde = True, bins = 20)
plt.xticks(rotation = 90)
plt.show()
localhost:8888/notebooks/internet.ipynb 11/45
4/24/23, 9:36 PM internet - Jupyter Notebook
localhost:8888/notebooks/internet.ipynb 12/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [26]:
for i in num_cols:
plt.figure(figsize=(15,6))
sns.boxplot(df[i], data = df, palette = 'hls')
plt.xticks(rotation = 90)
plt.show()
localhost:8888/notebooks/internet.ipynb 13/45
4/24/23, 9:36 PM internet - Jupyter Notebook
localhost:8888/notebooks/internet.ipynb 14/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [27]:
for i in num_cols:
plt.figure(figsize=(15,6))
sns.violinplot(df[i], data = df, palette = 'hls')
plt.xticks(rotation = 90)
plt.show()
localhost:8888/notebooks/internet.ipynb 15/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [28]:
import plotly.express as px
for i in num_cols:
fig = px.histogram(df, x=i, nbins=20)
fig.show()
1000
800
count
600
400
localhost:8888/notebooks/internet.ipynb 16/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [29]:
Year Boxplot
2020
2015
2010
2005
Year
2000
1995
In [30]:
Year Boxplot
2020
2010
Year
2000
localhost:8888/notebooks/internet.ipynb 17/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [31]:
for i in num_cols:
if i != 'Year':
plt.figure(figsize=(15,6))
sns.barplot(x = df['Year'], y = df[i], data = df, ci = None, palette = 'hls')
plt.xticks(rotation = 90)
plt.show()
localhost:8888/notebooks/internet.ipynb 18/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [32]:
for i in num_cols:
if i != 'Year':
fig = px.bar(df, x='Year', y=i, color='Year')
fig.show()
25k
20k
Cellular Subscription
15k
10k
localhost:8888/notebooks/internet.ipynb 19/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [33]:
for i in num_cols:
for j in num_cols:
if i != j:
plt.figure(figsize=(15,6))
sns.lineplot(x = df[i], y = df[j], data = df, ci = None, palette = 'hls')
plt.xticks(rotation = 90)
plt.show()
In [34]:
temp=df[~(df['Code']=='Region')]
In [35]:
temp1= temp.groupby(['Entity','Year']).sum().reset_index()
In [36]:
df1=temp1[(temp1['Entity']=='India')|(temp1['Entity']=='China')|(temp1['Entity']=='Unite
localhost:8888/notebooks/internet.ipynb 20/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [37]:
df1
Out[37]:
United
7900 2016 122.594551 85.544418 279910069 32.727173
States
United
7901 2017 123.044838 87.274887 287824925 33.283627
States
United
7902 2018 106.464684 88.498901 293940279 33.860367
States
United
7903 2019 108.092651 89.430283 298983035 34.725369
States
United
7904 2020 106.185554 90.900002 305371298 36.608768
States
localhost:8888/notebooks/internet.ipynb 21/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [38]:
120
100
Cellular Subscription
80
60
40
20
localhost:8888/notebooks/internet.ipynb 22/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [39]:
120
100
80
Cellular Subscription
60
40
20
localhost:8888/notebooks/internet.ipynb 23/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [40]:
300
250
Cellular Subscription
200
150
100
localhost:8888/notebooks/internet.ipynb 24/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [41]:
100
llular Subscription per 100 people
80
60
40
20
localhost:8888/notebooks/internet.ipynb 25/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [42]:
120
100
Cellular Subscription
80
60
40
20
localhost:8888/notebooks/internet.ipynb 26/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [43]:
150
Cellular Subscription
100
50
localhost:8888/notebooks/internet.ipynb 27/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [44]:
35
30
Broadband Subscription
25
20
15
10
localhost:8888/notebooks/internet.ipynb 28/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [45]:
30
Broadband Subscription
20
10
localhost:8888/notebooks/internet.ipynb 29/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [46]:
70
60
Broadband Subscription
50
40
30
20
localhost:8888/notebooks/internet.ipynb 30/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [47]:
100
adband Subscription per 100 people
80
60
40
20
localhost:8888/notebooks/internet.ipynb 31/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [48]:
35
30
Broadband Subscription
25
20
15
10
localhost:8888/notebooks/internet.ipynb 32/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [49]:
50
40
Broadband Subscription
30
20
10
In [50]:
df2 = df[df['Code']=='Region']
In [51]:
df2['Entity'].unique()
Out[51]:
localhost:8888/notebooks/internet.ipynb 33/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [52]:
In [53]:
En
120
100
Cellular Subscription
80
60
40
20
localhost:8888/notebooks/internet.ipynb 34/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [54]:
150
En
100
Cellular Subscription
50
localhost:8888/notebooks/internet.ipynb 35/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [55]:
En
1400
1200
Cellular Subscription
1000
800
600
400
localhost:8888/notebooks/internet.ipynb 36/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [56]:
100
En
llular Subscription per 100 people
80
60
40
20
localhost:8888/notebooks/internet.ipynb 37/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [57]:
En
120
100
Cellular Subscription
80
60
40
20
localhost:8888/notebooks/internet.ipynb 38/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [58]:
En
150
Cellular Subscription
100
50
−50
localhost:8888/notebooks/internet.ipynb 39/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [59]:
En
35
30
Broadband Subscription
25
20
15
10
localhost:8888/notebooks/internet.ipynb 40/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [60]:
40
En
30
Broadband Subscription
20
10
localhost:8888/notebooks/internet.ipynb 41/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [61]:
250 En
200
Broadband Subscription
150
100
50
localhost:8888/notebooks/internet.ipynb 42/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [62]:
100
En
Landline Internet per 100 people
80
60
40
20
localhost:8888/notebooks/internet.ipynb 43/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [63]:
En
30
Broadband Subscription
20
10
localhost:8888/notebooks/internet.ipynb 44/45
4/24/23, 9:36 PM internet - Jupyter Notebook
In [64]:
En
50
40
Broadband Subscription
30
20
10
−10
localhost:8888/notebooks/internet.ipynb 45/45