Skip to content

Save subcategory results to json #242

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 47 additions & 38 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,46 +252,55 @@ def calculate_throughput(self, final_calc=False):
with open(log_filepath, "r") as f:
loaded_logs.append(json.load(f))

measurements_counts = [(len(log["start_times"]), len(log["finish_times"]), len(log["workload_size"])) for log in
loaded_logs]
if not all(x[0] == x[1] == x[2] and x[0] >= MIN_MEASUREMENTS_IN_OVERLAP_COUNT for x in measurements_counts):
ask_for_patience("benchmark on-going, CPU util: {:>3.0f}%".format(psutil.cpu_percent(1)))
return None
latest_start = max(log["start_times"][0] for log in loaded_logs)
earliest_finish = min(log["finish_times"][-1] for log in loaded_logs)

measurements_completed_in_overlap_total = 0
throughput_total = 0.
for log in loaded_logs:
input_size_processed_per_process = 0
total_latency_per_process = 0.
measurements_completed_in_overlap = 0
for i in range(len(log["start_times"])):
start = log["start_times"][i]
finish = log["finish_times"][i]
if start >= latest_start and finish <= earliest_finish:
input_size_processed_per_process += log["workload_size"][i]
total_latency_per_process += finish - start
measurements_completed_in_overlap += 1
elif earliest_finish < finish:
break
if measurements_completed_in_overlap < MIN_MEASUREMENTS_IN_OVERLAP_COUNT:
results = {}
for subcategory in loaded_logs[0].keys():
current_logs = []
for log in loaded_logs:
current_logs.append(log[subcategory])

measurements_counts = [(len(log["start_times"]), len(log["finish_times"]), len(log["workload_size"]))
for log in current_logs]
if not all(x[0] == x[1] == x[2] and x[0] >= MIN_MEASUREMENTS_IN_OVERLAP_COUNT for x in measurements_counts):
ask_for_patience("benchmark on-going, CPU util: {:>3.0f}%".format(psutil.cpu_percent(1)))
return None
measurements_completed_in_overlap_total += measurements_completed_in_overlap
throughput_total += input_size_processed_per_process / total_latency_per_process

if self._prev_measurements_count is not None and \
measurements_completed_in_overlap_total > self._prev_measurements_count:
self.stable = abs((throughput_total / self._prev_throughput_total) - 1.) <= MAX_DEVIATION
self._prev_throughput_total = throughput_total
self._prev_measurements_count = measurements_completed_in_overlap_total

if not self.stable and not final_calc:
print("\r{}total throughput: {:.2f} ips, CPU util: {:>3.0f}%, stabilizing result ...".format(
INDENT, throughput_total, psutil.cpu_percent(1)), end='')

return throughput_total
latest_start = max(log["start_times"][0] for log in current_logs)
earliest_finish = min(log["finish_times"][-1] for log in current_logs)

measurements_completed_in_overlap_total = 0
throughput_total = 0.
for log in current_logs:
input_size_processed_per_process = 0
total_latency_per_process = 0.
measurements_completed_in_overlap = 0
for i in range(len(log["start_times"])):
start = log["start_times"][i]
finish = log["finish_times"][i]
if start >= latest_start and finish <= earliest_finish:
input_size_processed_per_process += log["workload_size"][i]
total_latency_per_process += finish - start
measurements_completed_in_overlap += 1
elif earliest_finish < finish:
break
if measurements_completed_in_overlap < MIN_MEASUREMENTS_IN_OVERLAP_COUNT:
ask_for_patience("benchmark on-going, CPU util: {:>3.0f}%".format(psutil.cpu_percent(1)))
return None
measurements_completed_in_overlap_total += measurements_completed_in_overlap
throughput_total += input_size_processed_per_process / total_latency_per_process

if subcategory == "overall" and self._prev_measurements_count is not None and \
measurements_completed_in_overlap_total > self._prev_measurements_count:
self.stable = abs((throughput_total / self._prev_throughput_total) - 1.) <= MAX_DEVIATION
self._prev_throughput_total = throughput_total
self._prev_measurements_count = measurements_completed_in_overlap_total

if not self.stable and not final_calc and subcategory == "overall":
print("\r{}total throughput: {:.2f} ips, CPU util: {:>3.0f}%, stabilizing result ...".format(
INDENT, throughput_total, psutil.cpu_percent(1)), end='')
results[subcategory] = {"throughput_total": throughput_total,
"start_timestamp": latest_start,
"finish_timestamp": earliest_finish}

return results["overall"]["throughput_total"]


def run_benchmark(model_script, numa_nodes, num_threads_node, num_proc_node, num_threads_per_proc, start_delay=0):
Expand Down
13 changes: 11 additions & 2 deletions utils/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,21 @@ def _dump_results(self):
with self._dump_filelock:
with open(self._dump_filepath, "w") as f:
times_invoked = self._times_invoked
json.dump({
json_data = {}
json_data["overall"] = {
"workload_size": self._workload_size[self.warm_up_runs:times_invoked],
"start_times": self._start_times[self.warm_up_runs:times_invoked],
"finish_times": self._finish_times[self.warm_up_runs:times_invoked],
"threads": os.environ.get("AIO_NUMA_CPUS")
}, f)
}
for subcategory in self._subcategories.keys():
json_data[subcategory] = {
"workload_size": self._subcategories[subcategory]["workload_size"][self.warm_up_runs:times_invoked], # noqa
"start_times": self._subcategories[subcategory]["start_times"][self.warm_up_runs:times_invoked],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here might be the source of the issue I've mentioned on mzd side. You cut subcategories' runs based on times_invoked as seen from outside of the llama generative loop.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem was actually discarding warmup runs when they were already ignored by start_subcategory_measurement and finish_subcategory_measurement, which resulted in not meeting the minimum measurements count

"finish_times": self._subcategories[subcategory]["finish_times"][self.warm_up_runs:times_invoked], # noqa
"threads": os.environ.get("AIO_NUMA_CPUS")
}
json.dump(json_data, f)

def _dump_loop(self):
while self._do_dump:
Expand Down