Pandas Essentials
What
Pandas provides DataFrame — a labeled 2D table. The go-to tool for loading, cleaning, exploring, and transforming tabular data.
Key operations
Loading data
import pandas as pd
df = pd.read_csv("data.csv")
df = pd.read_json("data.json")
df = pd.read_parquet("data.parquet") # fast, compressed — prefer for large dataExploring
df.shape # (rows, cols)
df.head() # first 5 rows
df.info() # column names, types, non-null counts
df.describe() # statistics for numeric columns
df.dtypes # column data types
df.isnull().sum() # missing values per column
df.value_counts("col") # frequency of each valueSelecting
df["col"] # single column (Series)
df[["col1", "col2"]] # multiple columns (DataFrame)
df.loc[0] # row by label
df.iloc[0] # row by position
df[df["age"] > 30] # boolean filter
df.query("age > 30") # same, cleaner syntaxTransforming
df["new_col"] = df["a"] + df["b"] # create column
df["col"] = df["col"].apply(lambda x: x*2) # apply function
df.drop(columns=["col"]) # remove column
df.rename(columns={"old": "new"}) # rename
df.sort_values("col", ascending=False) # sort
df.groupby("category")["value"].mean() # group and aggregateHandling missing data
df.dropna() # drop rows with any NaN
df.fillna(0) # fill NaN with 0
df["col"].fillna(df["col"].median()) # fill with median