Skip to content

Commit 5c4085d

Browse files
committed
ok
1 parent f63d50b commit 5c4085d

File tree

4 files changed

+16483
-0
lines changed

4 files changed

+16483
-0
lines changed

pandas/mere_dost.csv

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
name,marks,city
2+
ajay,45,gorakhpur
3+
vijay,10,kushingar
4+
mohan,42,kasia
5+
jadish,45,lucknow
6+
krishna,36,odisha

pandas/new.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import pandas as pd
2+
3+
# Load dataset (replace 'population.csv' with the path to your file)
4+
df = pd.read_csv('population.csv')
5+
6+
# 1. Inspect the Data
7+
print("First 5 rows of the dataset:")
8+
print(df.head())
9+
10+
print("\nBasic Information:")
11+
print(df.info())
12+
13+
print("\nSummary Statistics:")
14+
print(df.describe())
15+
16+
# 2. Check for Missing Data
17+
print("\nMissing Values per Column:")
18+
print(df.isnull().sum())
19+
20+
# 3. Handle Missing Data (Example: Fill with mean or drop)
21+
df_cleaned = df.fillna(df.mean()) # Fill missing values with column means
22+
# Alternatively, drop rows with missing values
23+
# df_cleaned = df.dropna()
24+
25+
# 4. Data Analysis - Grouping and Aggregation
26+
# Example: Group by a column and calculate the mean of other columns
27+
grouped_data = df_cleaned.groupby('Category').mean()
28+
print("\nMean values by Category:")
29+
print(grouped_data)
30+
31+
# 5. Filter Data
32+
# Example: Filter rows where a column 'Sales' is greater than 500
33+
filtered_data = df_cleaned[df_cleaned['Sales'] > 500]
34+
print("\nFiltered Data (Sales > 500):")
35+
print(filtered_data)
36+
37+
# 6. Correlation Analysis
38+
print("\nCorrelation between numerical columns:")
39+
print(df_cleaned.corr())
40+
41+
# 7. Save the cleaned data to a new CSV file
42+
df_cleaned.to_csv('cleaned_data.csv', index=False)
43+
44+
# Optional: Plotting (if you want to visualize the data)
45+
import matplotlib.pyplot as plt
46+
df_cleaned['Sales'].hist(bins=20)
47+
plt.title('Sales Distribution')
48+
plt.xlabel('Sales')
49+
plt.ylabel('Frequency')
50+
plt.show()

pandas/pandfas.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
dict={
5+
"name":['ajay', 'vijay', 'mohan', 'jadish', 'krishna'],
6+
"marks":['45','10','42','45','36'],
7+
"city":['gorakhpur','kushingar','kasia','lucknow', 'odisha ']
8+
9+
}
10+
11+
# data farama
12+
df=pd.DataFrame(dict)
13+
14+
15+
df.to_csv('mere_dost.csv')
16+
17+
# for index need no need
18+
df.to_csv('mere_dost.csv', index=False)
19+
20+
df.head(2)
21+
22+
df.tail(2)
23+
24+
df.describe()
25+
26+
print(df.describe)

0 commit comments

Comments
 (0)