Vous êtes sur la page 1sur 15

3/22/24, 6:37 PM Pract1

In [1]: import numpy as np


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]: weather = pd.read_csv("weatherAUS.csv")

In [3]: weather.head(15)

Out[3]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGus

2008-
0 Albury 13.4 22.9 0.6 NaN NaN W
12-01

2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW
12-02

2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW
12-03

2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE
12-04

2008-
4 Albury 17.5 32.3 1.0 NaN NaN W
12-05

2008-
5 Albury 14.6 29.7 0.2 NaN NaN WNW
12-06

2008-
6 Albury 14.3 25.0 0.0 NaN NaN W
12-07

2008-
7 Albury 7.7 26.7 0.0 NaN NaN W
12-08

2008-
8 Albury 9.7 31.9 0.0 NaN NaN NNW
12-09

2008-
9 Albury 13.1 30.1 1.4 NaN NaN W
12-10

2008-
10 Albury 13.4 30.4 0.0 NaN NaN N
12-11

2008-
11 Albury 15.9 21.7 2.2 NaN NaN NNE
12-12

2008-
12 Albury 15.9 18.6 15.6 NaN NaN W
12-13

2008-
13 Albury 12.6 21.0 3.6 NaN NaN SW
12-14

2008-
14 Albury 9.8 27.7 NaN NaN NaN WNW
12-16

15 rows × 24 columns

In [4]: weather.tail()

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 1/15


3/22/24, 6:37 PM Pract1

Out[4]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir Win

2017-
142188 Uluru 3.5 21.8 0.0 NaN NaN E
06-20

2017-
142189 Uluru 2.8 23.4 0.0 NaN NaN E
06-21

2017-
142190 Uluru 3.6 25.3 0.0 NaN NaN NNW
06-22

2017-
142191 Uluru 5.4 26.9 0.0 NaN NaN N
06-23

2017-
142192 Uluru 7.8 27.0 0.0 NaN NaN SE
06-24

5 rows × 24 columns

In [5]: weather.columns

Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',


Out[5]:
'Sunshine', 'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
'Temp3pm', 'RainToday', 'RISK_MM', 'RainTomorrow'],
dtype='object')

In [6]: print(len(weather.columns))

24

In [7]: weather.index

RangeIndex(start=0, stop=142193, step=1)


Out[7]:

In [8]: weather.shape

(142193, 24)
Out[8]:

In [9]: weather.dtypes

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 2/15


3/22/24, 6:37 PM Pract1
Date object
Out[9]:
Location object
MinTemp float64
MaxTemp float64
Rainfall float64
Evaporation float64
Sunshine float64
WindGustDir object
WindGustSpeed float64
WindDir9am object
WindDir3pm object
WindSpeed9am float64
WindSpeed3pm float64
Humidity9am float64
Humidity3pm float64
Pressure9am float64
Pressure3pm float64
Cloud9am float64
Cloud3pm float64
Temp9am float64
Temp3pm float64
RainToday object
RISK_MM float64
RainTomorrow object
dtype: object

In [10]: weather.columns.values

array(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall',


Out[10]:
'Evaporation', 'Sunshine', 'WindGustDir', 'WindGustSpeed',
'WindDir9am', 'WindDir3pm', 'WindSpeed9am', 'WindSpeed3pm',
'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm',
'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainToday',
'RISK_MM', 'RainTomorrow'], dtype=object)

In [11]: weather.describe(include='all')

Out[11]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshin

count 142193 142193 141556.000000 141871.000000 140787.000000 81350.000000 74377.00000

unique 3436 49 NaN NaN NaN NaN NaN

2013-
top Canberra NaN NaN NaN NaN NaN
12-01

freq 49 3418 NaN NaN NaN NaN NaN

mean NaN NaN 12.186400 23.226784 2.349974 5.469824 7.62485

std NaN NaN 6.403283 7.117618 8.465173 4.188537 3.78152

min NaN NaN -8.500000 -4.800000 0.000000 0.000000 0.00000

25% NaN NaN 7.600000 17.900000 0.000000 2.600000 4.90000

50% NaN NaN 12.000000 22.600000 0.000000 4.800000 8.50000

75% NaN NaN 16.800000 28.200000 0.800000 7.400000 10.60000

max NaN NaN 33.900000 48.100000 371.000000 145.000000 14.50000

11 rows × 24 columns

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 3/15


3/22/24, 6:37 PM Pract1

In [12]: weather['Date']

0 2008-12-01
Out[12]:
1 2008-12-02
2 2008-12-03
3 2008-12-04
4 2008-12-05
...
142188 2017-06-20
142189 2017-06-21
142190 2017-06-22
142191 2017-06-23
142192 2017-06-24
Name: Date, Length: 142193, dtype: object

In [13]: weather.sort_index(axis=1,ascending=False)

Out[13]: WindSpeed9am WindSpeed3pm WindGustSpeed WindGustDir WindDir9am WindDir3pm

0 20.0 24.0 44.0 W W WNW

1 4.0 22.0 44.0 WNW NNW WSW

2 19.0 26.0 46.0 WSW W WSW

3 11.0 9.0 24.0 NE SE

4 7.0 20.0 41.0 W ENE NW

... ... ... ... ... ...

142188 15.0 13.0 31.0 E ESE

142189 13.0 11.0 31.0 E SE EN

142190 13.0 9.0 22.0 NNW SE

142191 9.0 9.0 37.0 N SE WNW

142192 13.0 7.0 28.0 SE SSE

142193 rows × 24 columns

In [14]: weather.sort_index(axis=1,ascending=True)

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 4/15


3/22/24, 6:37 PM Pract1

Out[14]: Cloud3pm Cloud9am Date Evaporation Humidity3pm Humidity9am Location MaxTe

2008-
0 NaN 8.0 NaN 22.0 71.0 Albury 2
12-01

2008-
1 NaN NaN NaN 25.0 44.0 Albury 2
12-02

2008-
2 2.0 NaN NaN 30.0 38.0 Albury 2
12-03

2008-
3 NaN NaN NaN 16.0 45.0 Albury 2
12-04

2008-
4 8.0 7.0 NaN 33.0 82.0 Albury 3
12-05

... ... ... ... ... ... ... ...

2017-
142188 NaN NaN NaN 27.0 59.0 Uluru 2
06-20

2017-
142189 NaN NaN NaN 24.0 51.0 Uluru 2
06-21

2017-
142190 NaN NaN NaN 21.0 56.0 Uluru 2
06-22

2017-
142191 NaN NaN NaN 24.0 53.0 Uluru 2
06-23

2017-
142192 2.0 3.0 NaN 24.0 51.0 Uluru 2
06-24

142193 rows × 24 columns

In [15]: weather.sort_values(by='Date')

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 5/15


3/22/24, 6:37 PM Pract1

Out[15]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir

2007-
44351 Canberra 8.0 24.3 0.0 3.4 6.3 NW
11-01

2007-
44352 Canberra 14.0 26.9 3.6 4.4 9.7 ENE
11-02

2007-
44353 Canberra 13.7 23.4 3.6 5.8 3.3 NW
11-03

2007-
44354 Canberra 13.3 15.5 39.8 7.2 9.1 NW
11-04

2007-
44355 Canberra 7.6 16.1 2.8 5.6 10.6 SSE
11-05

... ... ... ... ... ... ... ... ...

2017-
38814 WaggaWagga -0.1 12.7 0.0 NaN NaN SSW
06-25

2017-
32833 Sydney 7.6 19.3 0.0 3.4 9.4 W
06-25

2017-
29496 Richmond 2.7 20.0 0.2 NaN NaN W
06-25

2017-
26545 Penrith 3.1 20.0 0.0 NaN NaN SW
06-25

2017-
35838 SydneyAirport 6.8 19.5 0.0 3.4 9.4 WSW
06-25

142193 rows × 24 columns

In [16]: weather.iloc[5]

Date 2008-12-06
Out[16]:
Location Albury
MinTemp 14.6
MaxTemp 29.7
Rainfall 0.2
Evaporation NaN
Sunshine NaN
WindGustDir WNW
WindGustSpeed 56.0
WindDir9am W
WindDir3pm W
WindSpeed9am 19.0
WindSpeed3pm 24.0
Humidity9am 55.0
Humidity3pm 23.0
Pressure9am 1009.2
Pressure3pm 1005.4
Cloud9am NaN
Cloud3pm NaN
Temp9am 20.6
Temp3pm 28.9
RainToday No
RISK_MM 0.0
RainTomorrow No
Name: 5, dtype: object

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 6/15


3/22/24, 6:37 PM Pract1

In [17]: weather[10:15]

Out[17]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGus

2008-
10 Albury 13.4 30.4 0.0 NaN NaN N
12-11

2008-
11 Albury 15.9 21.7 2.2 NaN NaN NNE
12-12

2008-
12 Albury 15.9 18.6 15.6 NaN NaN W
12-13

2008-
13 Albury 12.6 21.0 3.6 NaN NaN SW
12-14

2008-
14 Albury 9.8 27.7 NaN NaN NaN WNW
12-16

5 rows × 24 columns

In [18]: weather.loc[:,["Date"]]

Out[18]: Date

0 2008-12-01

1 2008-12-02

2 2008-12-03

3 2008-12-04

4 2008-12-05

... ...

142188 2017-06-20

142189 2017-06-21

142190 2017-06-22

142191 2017-06-23

142192 2017-06-24

142193 rows × 1 columns

In [19]: weather.iloc[:10, :]

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 7/15


3/22/24, 6:37 PM Pract1

Out[19]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGust

2008-
0 Albury 13.4 22.9 0.6 NaN NaN W
12-01

2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW
12-02

2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW
12-03

2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE
12-04

2008-
4 Albury 17.5 32.3 1.0 NaN NaN W
12-05

2008-
5 Albury 14.6 29.7 0.2 NaN NaN WNW
12-06

2008-
6 Albury 14.3 25.0 0.0 NaN NaN W
12-07

2008-
7 Albury 7.7 26.7 0.0 NaN NaN W
12-08

2008-
8 Albury 9.7 31.9 0.0 NaN NaN NNW
12-09

2008-
9 Albury 13.1 30.1 1.4 NaN NaN W
12-10

10 rows × 24 columns

In [20]: weather.iloc[:, :10]

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 8/15


3/22/24, 6:37 PM Pract1

Out[20]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir Win

2008-
0 Albury 13.4 22.9 0.6 NaN NaN W
12-01

2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW
12-02

2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW
12-03

2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE
12-04

2008-
4 Albury 17.5 32.3 1.0 NaN NaN W
12-05

... ... ... ... ... ... ... ... ...

2017-
142188 Uluru 3.5 21.8 0.0 NaN NaN E
06-20

2017-
142189 Uluru 2.8 23.4 0.0 NaN NaN E
06-21

2017-
142190 Uluru 3.6 25.3 0.0 NaN NaN NNW
06-22

2017-
142191 Uluru 5.4 26.9 0.0 NaN NaN N
06-23

2017-
142192 Uluru 7.8 27.0 0.0 NaN NaN SE
06-24

142193 rows × 10 columns

In [21]: weather.iloc[:10, :20]

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 9/15


3/22/24, 6:37 PM Pract1

Out[21]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGust

2008-
0 Albury 13.4 22.9 0.6 NaN NaN W
12-01

2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW
12-02

2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW
12-03

2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE
12-04

2008-
4 Albury 17.5 32.3 1.0 NaN NaN W
12-05

2008-
5 Albury 14.6 29.7 0.2 NaN NaN WNW
12-06

2008-
6 Albury 14.3 25.0 0.0 NaN NaN W
12-07

2008-
7 Albury 7.7 26.7 0.0 NaN NaN W
12-08

2008-
8 Albury 9.7 31.9 0.0 NaN NaN NNW
12-09

2008-
9 Albury 13.1 30.1 1.4 NaN NaN W
12-10

In [22]: weather.isnull()

Out[22]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir Wind

0 False False False False False True True False

1 False False False False False True True False

2 False False False False False True True False

3 False False False False False True True False

4 False False False False False True True False

... ... ... ... ... ... ... ... ...

142188 False False False False False True True False

142189 False False False False False True True False

142190 False False False False False True True False

142191 False False False False False True True False

142192 False False False False False True True False

142193 rows × 24 columns

In [23]: weather.isna()

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 10/15


3/22/24, 6:37 PM Pract1

Out[23]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir Wind

0 False False False False False True True False

1 False False False False False True True False

2 False False False False False True True False

3 False False False False False True True False

4 False False False False False True True False

... ... ... ... ... ... ... ... ...

142188 False False False False False True True False

142189 False False False False False True True False

142190 False False False False False True True False

142191 False False False False False True True False

142192 False False False False False True True False

142193 rows × 24 columns

In [24]: weather.isnull().any()

Date False
Out[24]:
Location False
MinTemp True
MaxTemp True
Rainfall True
Evaporation True
Sunshine True
WindGustDir True
WindGustSpeed True
WindDir9am True
WindDir3pm True
WindSpeed9am True
WindSpeed3pm True
Humidity9am True
Humidity3pm True
Pressure9am True
Pressure3pm True
Cloud9am True
Cloud3pm True
Temp9am True
Temp3pm True
RainToday True
RISK_MM False
RainTomorrow False
dtype: bool

In [25]: weather.isnull().sum().sum()

316559
Out[25]:

In [26]: weather.isnull().sum(axis=1)

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 11/15


3/22/24, 6:37 PM Pract1
0 3
Out[26]:
1 4
2 3
3 4
4 2
..
142188 4
142189 4
142190 4
142191 4
142192 2
Length: 142193, dtype: int64

In [27]: weather.isna().sum()

Date 0
Out[27]:
Location 0
MinTemp 637
MaxTemp 322
Rainfall 1406
Evaporation 60843
Sunshine 67816
WindGustDir 9330
WindGustSpeed 9270
WindDir9am 10013
WindDir3pm 3778
WindSpeed9am 1348
WindSpeed3pm 2630
Humidity9am 1774
Humidity3pm 3610
Pressure9am 14014
Pressure3pm 13981
Cloud9am 53657
Cloud3pm 57094
Temp9am 904
Temp3pm 2726
RainToday 1406
RISK_MM 0
RainTomorrow 0
dtype: int64

In [28]: weather.Evaporation.isnull().sum()

60843
Out[28]:

In [29]: weather.groupby(['Location'])['Cloud9am'].apply(lambda x: x.isnull().sum())

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 12/15


3/22/24, 6:37 PM Pract1
Location
Out[29]:
Adelaide 3090
Albany 35
Albury 1729
AliceSprings 332
BadgerysCreek 2928
Ballarat 544
Bendigo 964
Brisbane 1
Cairns 185
Canberra 1066
Cobar 369
CoffsHarbour 766
Dartmoor 2943
Darwin 1
GoldCoast 2980
Hobart 1162
Katherine 37
Launceston 2308
Melbourne 406
MelbourneAirport 0
Mildura 84
Moree 483
MountGambier 106
MountGinini 2907
Newcastle 181
Nhil 1569
NorahHead 2929
NorfolkIsland 61
Nuriootpa 127
PearceRAAF 946
Penrith 2964
Perth 2
PerthAirport 2
Portland 409
Richmond 2348
Sale 355
SalmonGums 2955
Sydney 566
SydneyAirport 4
Townsville 172
Tuggeranong 2998
Uluru 1183
WaggaWagga 207
Walpole 2819
Watsonia 0
Williamtown 458
Witchcliffe 2952
Wollongong 1661
Woomera 363
Name: Cloud9am, dtype: int64

In [30]: weather.info()

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 13/15


3/22/24, 6:37 PM Pract1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142193 entries, 0 to 142192
Data columns (total 24 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 142193 non-null object
1 Location 142193 non-null object
2 MinTemp 141556 non-null float64
3 MaxTemp 141871 non-null float64
4 Rainfall 140787 non-null float64
5 Evaporation 81350 non-null float64
6 Sunshine 74377 non-null float64
7 WindGustDir 132863 non-null object
8 WindGustSpeed 132923 non-null float64
9 WindDir9am 132180 non-null object
10 WindDir3pm 138415 non-null object
11 WindSpeed9am 140845 non-null float64
12 WindSpeed3pm 139563 non-null float64
13 Humidity9am 140419 non-null float64
14 Humidity3pm 138583 non-null float64
15 Pressure9am 128179 non-null float64
16 Pressure3pm 128212 non-null float64
17 Cloud9am 88536 non-null float64
18 Cloud3pm 85099 non-null float64
19 Temp9am 141289 non-null float64
20 Temp3pm 139467 non-null float64
21 RainToday 140787 non-null object
22 RISK_MM 142193 non-null float64
23 RainTomorrow 142193 non-null object
dtypes: float64(17), object(7)
memory usage: 26.0+ MB

In [31]: print(weather.std())

MinTemp 6.403283
MaxTemp 7.117618
Rainfall 8.465173
Evaporation 4.188537
Sunshine 3.781525
WindGustSpeed 13.588801
WindSpeed9am 8.893337
WindSpeed3pm 8.803345
Humidity9am 19.051293
Humidity3pm 20.797772
Pressure9am 7.105476
Pressure3pm 7.036677
Cloud9am 2.887016
Cloud3pm 2.720633
Temp9am 6.492838
Temp3pm 6.937594
RISK_MM 8.477969
dtype: float64
/tmp/ipykernel_5332/3866207191.py:1: FutureWarning: The default value of numeric_o
nly in DataFrame.std is deprecated. In a future version, it will default to False.
In addition, specifying 'numeric_only=None' is deprecated. Select only valid colum
ns or specify the value of numeric_only to silence this warning.
print(weather.std())

In [32]: print(weather.median())

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 14/15


3/22/24, 6:37 PM Pract1
MinTemp 12.0
MaxTemp 22.6
Rainfall 0.0
Evaporation 4.8
Sunshine 8.5
WindGustSpeed 39.0
WindSpeed9am 13.0
WindSpeed3pm 19.0
Humidity9am 70.0
Humidity3pm 52.0
Pressure9am 1017.6
Pressure3pm 1015.2
Cloud9am 5.0
Cloud3pm 5.0
Temp9am 16.7
Temp3pm 21.1
RISK_MM 0.0
dtype: float64
/tmp/ipykernel_5332/2339860456.py:1: FutureWarning: The default value of numeric_o
nly in DataFrame.median is deprecated. In a future version, it will default to Fal
se. In addition, specifying 'numeric_only=None' is deprecated. Select only valid c
olumns or specify the value of numeric_only to silence this warning.
print(weather.median())

In [ ]:

file:///C:/Users/Arbaz shaikh/AppData/Local/Microsoft/Windows/INetCache/IE/X2ZVULOZ/Pract1[1].html 15/15

Vous aimerez peut-être aussi