You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Show nr of rows containing a specific valueprint(sum(df['myFruitColumn'] =='apple'))
print(sum(df['myCountsColumn'] >=11))
# Count apples, oranges and bananas with groupbyprint(df.groupby(['myFruitColumn']).size())
# Show nr of rowsprint(len(df.index))
# OR (inde 0 of the shape that gives a row,col tuple)print(df.shape[0])
# ORrows=len(df.axes[0])
# OR (slowest)print(df.count())
# Print the top rowsmyDF.head()
# Print the last rowsmyDF.tail()
Time and date
# Convert 'Date' from string to datetime format (example 'Date': ['20200101', '20200201', '20200301', '20200401'])data['Date'] =pd.to_datetime(data['Date'], format='%Y%m%d')
# Extracting year, month, and day from the 'Date' columndata['Year'] =data['Date'].dt.yeardata['Month'] =data['Date'].dt.monthdata['Day'] =data['Date'].dt.day
Grouping and multiindex
# Group a DFdf_grp=data.groupby(["Year","Month"]).mean(numeric_only=False)
# Get a subset DF based on the grouping indexdf_march=df_grp.loc[2020,3]
# Take out one of the columnsmean_sweden=df_march["Sweden_Daily"]
# Loop through the groupsforidx, df_selectindf_grp.groupby(level=[0, 1]):
# Convert the month number to names=datetime.date(1900, idx[1], 1).strftime('%B')
# Add the data to the arraya.append([idx[0], s, round(df_select["Sweden_Daily"].mean(),0)])
# More: https://pandas.pydata.org/docs/reference/groupby.html
Filter based on / Find data based on
# Create 2 new dataframes based on an content in a column (in this case the column AnimalId)newDF1=df.loc[selected["AnimalId"] ==2998]
newDF2=df.loc[selected["AnimalId"] ==2850]
#Identify the peak idx_max_Sweden=df['Sweden_Daily'].idxmax()
theDate=data.loc[idx_max_Sweden,"Date"].date()
# Get the sum of the Sweden_Daily values, when Date = March (m)theSum=df.loc[df["Date"].dt.month==3]["Sweden_Daily"].sum()