-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData Collection and Splitting.py
54 lines (42 loc) · 1.7 KB
/
Data Collection and Splitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from tensorflow.keras.preprocessing import image_dataset_from_directory
# Collecting Dataset from Path
dataset = image_dataset_from_directory(
"Path",
shuffle=True,
batch_size=32,
image_size=(299, 299),
)
# Class Information
class_names = dataset.class_names
print("Class Names:", class_names)
print("Number of Classes:", len(class_names))
# Find the batch size
for images, labels in dataset.take(1): # Take one batch from the dataset
print("Batch Size:", images.shape[0])
print("Image Size:", images.shape[1:])
# Check the data types and shapes
print("Image Data Type:", images.dtype)
print("Label Data Type:", labels.dtype)
print("Label Shape:", labels.shape)
print("Labels in Batch:", labels.numpy())
# Total number of batches in the dataset
total_batches = len(dataset)
print("Total Number of Batches:", total_batches)
# Total number of images in the dataset
total_images = total_batches * images.shape[0]
print("Total Number of Images:", total_images)
# Data Split
def get_dataset_partisions_tf(ds, train_split=0.75, val_split=0.15, test_split=0.1, shuffle=True, shuffle_size=10000):
if shuffle:
ds = ds.shuffle(shuffle_size, seed=12)
dataset_size = len(ds)
train_size = int(train_split * dataset_size)
val_size = int(val_split * dataset_size)
test_size = int(test_split * dataset_size)
train_ds = ds.take(train_size)
remaining_ds = ds.skip(train_size)
val_ds = remaining_ds.take(val_size)
test_ds = remaining_ds.skip(val_size)
return train_ds, val_ds, test_ds
train_data, val_data, test_data = get_dataset_partisions_tf(dataset)
print("Train Data: ",len(train_data), "Validation Data: ",len(val_data), "Test Data: ",len(test_data))