Skip to content

Commit 0f6a2c7

Browse files
authored
Add integrity check before extracting precompiled NIFs (#36)
This commit adds the integrity check of downloaded tar.gz files containing the NIFs we published. The idea is avoid supply chain attacks. If GitHub gets hacked we don't load unsafe files in our machines. The requirement is that before publishing the package to Hex, we run the `mix rustler.download ModuleName --all` to generate the checksum file for the native module.
1 parent c757ec3 commit 0f6a2c7

File tree

6 files changed

+391
-19
lines changed

6 files changed

+391
-19
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ erl_crash.dump
2222
/priv/native
2323

2424
/native/*/target
25+
26+
# The checksum files for precompiled NIFs
27+
checksum-*.exs

lib/html5ever/native.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ defmodule Html5ever.Native do
1515
else
1616
case Html5ever.Precompiled.download_or_reuse_nif_file(
1717
rustler_opts,
18+
nif_module: __MODULE__,
1819
base_url: "#{github_url}/releases/download/v#{version}",
1920
version: version
2021
) do

lib/html5ever/precompiled.ex

Lines changed: 281 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,57 @@ defmodule Html5ever.Precompiled do
1414
x86_64-pc-windows-gnu
1515
)
1616
@available_nif_versions ~w(2.14 2.15 2.16)
17+
@checksum_algo :sha256
18+
@checksum_algorithms [@checksum_algo]
19+
20+
@native_dir "priv/native"
21+
22+
def available_targets do
23+
for target_triple <- @available_targets, nif_version <- @available_nif_versions do
24+
"nif-#{nif_version}-#{target_triple}"
25+
end
26+
end
27+
28+
@doc """
29+
Returns URLs for NIFs based on its module name
30+
31+
The module name is the one that defined the NIF and this information
32+
is stored in a metadata file.
33+
"""
34+
def available_nif_urls(nif_module) when is_atom(nif_module) do
35+
metadata =
36+
nif_module
37+
|> metadata_file()
38+
|> read_map_from_file()
39+
40+
case metadata do
41+
%{base_url: base_url, basename: basename, version: version} ->
42+
for target <- available_targets() do
43+
# We need to build again the name because each arch is different.
44+
lib_name = "#{lib_prefix(target)}#{basename}-v#{version}-#{target}"
45+
46+
tar_gz_file_url(base_url, lib_name_with_ext(target, lib_name))
47+
end
48+
49+
_ ->
50+
raise "metadata about current target for the module #{inspect(nif_module)} is not available. Please compile the project again with: `mix compile --force`"
51+
end
52+
end
53+
54+
def current_target_nif_url(nif_module) when is_atom(nif_module) do
55+
metadata =
56+
nif_module
57+
|> metadata_file()
58+
|> read_map_from_file()
59+
60+
case metadata do
61+
%{base_url: base_url, file_name: file_name} ->
62+
tar_gz_file_url(base_url, file_name)
63+
64+
_ ->
65+
raise "metadata about current target for the module #{inspect(nif_module)} is not available. Please compile the project again with: `mix compile --force`"
66+
end
67+
end
1768

1869
@doc """
1970
Returns the target triple for download or compile and load.
@@ -210,26 +261,48 @@ defmodule Html5ever.Precompiled do
210261
Enum.join(values, "-")
211262
end
212263

264+
@doc """
265+
Perform the download or load of the precompiled NIF
266+
267+
It will look in the "priv/native/otp_app" first, and if
268+
that file doesn't exist, it will try to fetch from cache.
269+
In case there is no valid cached file, then it will try
270+
to download the NIF from the provided base URL.
271+
"""
213272
def download_or_reuse_nif_file(rustler_opts, opts) do
214273
name = Keyword.fetch!(rustler_opts, :otp_app)
215274
version = Keyword.fetch!(opts, :version)
216275

217-
priv_dir = Application.app_dir(name, "priv")
276+
native_dir = Application.app_dir(name, @native_dir)
218277

219-
cache_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{}
220-
cache_dir = :filename.basedir(:user_cache, Atom.to_string(name), cache_opts)
278+
cache_dir = cache_dir("precompiled_nifs")
221279

222280
with {:ok, target} <- target() do
223-
nif_name = rustler_opts[:crate] || name
224-
lib_name = "#{lib_prefix(target)}#{nif_name}-v#{version}-#{target}"
281+
basename = rustler_opts[:crate] || name
282+
lib_name = "#{lib_prefix(target)}#{basename}-v#{version}-#{target}"
225283

226284
file_name = lib_name_with_ext(target, lib_name)
227285
cached_tar_gz = Path.join(cache_dir, "#{file_name}.tar.gz")
228286

229-
lib_file =
230-
priv_dir
231-
|> Path.join("native")
232-
|> Path.join(file_name)
287+
lib_file = Path.join(native_dir, file_name)
288+
289+
base_url = Keyword.fetch!(opts, :base_url)
290+
# TODO: once we move to Rustler, we probably don't need to fetch `:nif_module`
291+
nif_module = Keyword.fetch!(opts, :nif_module)
292+
293+
metadata = %{
294+
otp_app: name,
295+
crate: rustler_opts[:crate],
296+
cached_tar_gz: cached_tar_gz,
297+
base_url: base_url,
298+
basename: basename,
299+
lib_name: lib_name,
300+
file_name: file_name,
301+
target: target,
302+
version: version
303+
}
304+
305+
write_metadata(nif_module, metadata)
233306

234307
# Override Rustler opts so we load from the downloaded file.
235308
# See: https://hexdocs.pm/rustler/Rustler.html#module-configuration-options
@@ -238,24 +311,27 @@ defmodule Html5ever.Precompiled do
238311
|> Keyword.put(:skip_compilation?, true)
239312
|> Keyword.put(:load_from, {name, "priv/native/#{lib_name}"})
240313

314+
# TODO: add option to only write metadata
241315
cond do
242-
File.exists?(lib_file) ->
243-
Logger.debug("Using NIF from #{lib_file}")
244-
{:ok, new_opts}
245-
246316
File.exists?(cached_tar_gz) ->
247-
with :ok <- :erl_tar.extract(cached_tar_gz, [:compressed, cwd: Path.dirname(lib_file)]) do
317+
# Remove existing NIF file so we don't have processes using it.
318+
# See: https://github.com/rusterlium/rustler/blob/46494d261cbedd3c798f584459e42ab7ee6ea1f4/rustler_mix/lib/rustler/compiler.ex#L134
319+
File.rm(lib_file)
320+
321+
with :ok <- check_file_integrity(cached_tar_gz, nif_module),
322+
:ok <- :erl_tar.extract(cached_tar_gz, [:compressed, cwd: Path.dirname(lib_file)]) do
248323
Logger.debug("Copying NIF from cache and extracting to #{lib_file}")
249324
{:ok, new_opts}
250325
end
251326

252327
true ->
253-
base_url = Keyword.fetch!(opts, :base_url)
254328
dirname = Path.dirname(lib_file)
255329

256330
with :ok <- File.mkdir_p(cache_dir),
257331
:ok <- File.mkdir_p(dirname),
258332
{:ok, tar_gz} <- download_tar_gz(base_url, lib_name, cached_tar_gz),
333+
:ok <- File.write(cached_tar_gz, tar_gz),
334+
:ok <- check_file_integrity(cached_tar_gz, nif_module),
259335
:ok <-
260336
:erl_tar.extract({:binary, tar_gz}, [:compressed, cwd: Path.dirname(lib_file)]) do
261337
Logger.debug("NIF cached at #{cached_tar_gz} and extracted to #{lib_file}")
@@ -265,6 +341,75 @@ defmodule Html5ever.Precompiled do
265341
end
266342
end
267343

344+
defp checksum_map(nif_module) when is_atom(nif_module) do
345+
nif_module
346+
|> checksum_file()
347+
|> read_map_from_file()
348+
end
349+
350+
defp check_file_integrity(file_path, nif_module) when is_atom(nif_module) do
351+
nif_module
352+
|> checksum_map()
353+
|> check_integrity_from_map(file_path, nif_module)
354+
end
355+
356+
# It receives the map of %{ "filename" => "algo:checksum" } with the file path
357+
def check_integrity_from_map(checksum_map, file_path, nif_module) do
358+
with {:ok, {algo, hash}} <- find_checksum(checksum_map, file_path, nif_module),
359+
:ok <- validate_checksum_algo(algo),
360+
do: compare_checksum(file_path, algo, hash)
361+
end
362+
363+
defp find_checksum(checksum_map, file_path, nif_module) do
364+
basename = Path.basename(file_path)
365+
366+
case Map.fetch(checksum_map, basename) do
367+
{:ok, algo_with_hash} ->
368+
[algo, hash] = String.split(algo_with_hash, ":")
369+
algo = String.to_existing_atom(algo)
370+
371+
{:ok, {algo, hash}}
372+
373+
:error ->
374+
{:error,
375+
"the precompiled NIF file does not exist in the checksum file. Please consider run: `mix rustler.download #{inspect(nif_module)} --only-local` to generate the checksum file."}
376+
end
377+
end
378+
379+
defp validate_checksum_algo(algo) do
380+
if algo in @checksum_algorithms do
381+
:ok
382+
else
383+
{:error,
384+
"checksum algorithm is not supported: #{inspect(algo)}. The supported ones are:\n - #{Enum.join(@checksum_algorithms, "\n - ")}"}
385+
end
386+
end
387+
388+
defp compare_checksum(file_path, algo, expected_checksum) do
389+
case File.read(file_path) do
390+
{:ok, content} ->
391+
file_hash =
392+
algo
393+
|> :crypto.hash(content)
394+
|> Base.encode16(case: :lower)
395+
396+
if file_hash == expected_checksum do
397+
:ok
398+
else
399+
{:error, "the integrity check failed because the checksum of files does not match"}
400+
end
401+
402+
{:error, reason} ->
403+
{:error,
404+
"cannot read the file for checksum comparison: #{inspect(file_path)}. Reason: #{inspect(reason)}"}
405+
end
406+
end
407+
408+
defp cache_dir(sub_dir) do
409+
cache_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{}
410+
:filename.basedir(:user_cache, Path.join("rustler", sub_dir), cache_opts)
411+
end
412+
268413
defp lib_prefix(target) do
269414
if String.contains?(target, "windows") do
270415
""
@@ -284,18 +429,23 @@ defmodule Html5ever.Precompiled do
284429
"#{lib_name}.#{ext}"
285430
end
286431

287-
defp download_tar_gz(base_url, lib_name, target_name) do
432+
defp tar_gz_file_url(base_url, file_name) do
288433
uri = URI.parse(base_url)
289434

290435
uri =
291436
Map.update!(uri, :path, fn path ->
292-
"#{path}/#{lib_name_with_ext(target_name, lib_name)}.tar.gz"
437+
Path.join(path, "#{file_name}.tar.gz")
293438
end)
294439

295-
download_nif_artifact(to_string(uri))
440+
to_string(uri)
441+
end
442+
443+
defp download_tar_gz(base_url, lib_name, target_name) do
444+
base_url
445+
|> tar_gz_file_url(lib_name_with_ext(target_name, lib_name))
446+
|> download_nif_artifact()
296447
end
297448

298-
# Gets the NIF file from a given URL.
299449
defp download_nif_artifact(url) do
300450
url = String.to_charlist(url)
301451
Logger.debug("Downloading NIF from #{url}")
@@ -340,4 +490,116 @@ defmodule Html5ever.Precompiled do
340490
{:error, "couldn't fetch NIF from #{url}: #{inspect(other)}"}
341491
end
342492
end
493+
494+
@doc """
495+
Download a list of files from URLs and calculate its checksum.
496+
497+
Returns a list with details of the download and the checksum of each file.
498+
"""
499+
def download_nif_artifacts_with_checksums!(urls) do
500+
tasks =
501+
Task.async_stream(urls, fn url -> {url, download_nif_artifact(url)} end, timeout: :infinity)
502+
503+
cache_dir = cache_dir("precompiled_nifs")
504+
:ok = File.mkdir_p(cache_dir)
505+
506+
Enum.map(tasks, fn {:ok, result} ->
507+
with {:download, {url, download_result}} <- {:download, result},
508+
{:download_result, {:ok, body}} <- {:download_result, download_result},
509+
hash <- :crypto.hash(@checksum_algo, body),
510+
path <- Path.join(cache_dir, basename_from_url(url)),
511+
{:file, :ok} <- {:file, File.write(path, body)} do
512+
checksum = Base.encode16(hash, case: :lower)
513+
514+
Logger.debug(
515+
"NIF cached at #{path} with checksum #{inspect(checksum)} (#{@checksum_algo})"
516+
)
517+
518+
%{
519+
url: url,
520+
path: path,
521+
checksum: checksum,
522+
checksum_algo: @checksum_algo
523+
}
524+
else
525+
{context, result} ->
526+
raise "could not finish the download of NIF artifacts. Context: #{inspect(context)}. Reason: #{inspect(result)}"
527+
end
528+
end)
529+
end
530+
531+
defp basename_from_url(url) do
532+
uri = URI.parse(url)
533+
534+
uri.path
535+
|> String.split("/")
536+
|> List.last()
537+
end
538+
539+
def read_map_from_file(file) do
540+
with {:ok, contents} <- File.read(file),
541+
{%{} = contents, _} <- Code.eval_string(contents) do
542+
contents
543+
else
544+
_ -> %{}
545+
end
546+
end
547+
548+
defp write_metadata(nif_module, metadata) do
549+
metadata_file = metadata_file(nif_module)
550+
existing = read_map_from_file(metadata_file)
551+
552+
unless Map.equal?(metadata, existing) do
553+
dir = Path.dirname(metadata_file)
554+
:ok = File.mkdir_p(dir)
555+
556+
File.write!(metadata_file, inspect(metadata, limit: :infinity, pretty: true))
557+
end
558+
559+
:ok
560+
end
561+
562+
defp metadata_file(nif_module) when is_atom(nif_module) do
563+
rustler_cache = cache_dir("metadata")
564+
Path.join(rustler_cache, "metadata-#{nif_module}.exs")
565+
end
566+
567+
@doc """
568+
Write the checksum file with all NIFs available.
569+
570+
It receives the module name and checksums.
571+
"""
572+
def write_checksum!(nif_module, checksums) when is_atom(nif_module) do
573+
metadata =
574+
nif_module
575+
|> metadata_file()
576+
|> read_map_from_file()
577+
578+
case metadata do
579+
%{otp_app: _name} ->
580+
file = checksum_file(nif_module)
581+
582+
pairs =
583+
for %{path: path, checksum: checksum, checksum_algo: algo} <- checksums, into: %{} do
584+
basename = Path.basename(path)
585+
checksum = "#{algo}:#{checksum}"
586+
{basename, checksum}
587+
end
588+
589+
lines =
590+
for {filename, checksum} <- Enum.sort(pairs) do
591+
~s( "#{filename}" => #{inspect(checksum, limit: :infinity)},\n)
592+
end
593+
594+
File.write!(file, ["%{\n", lines, "}\n"])
595+
596+
_ ->
597+
raise "could not find the OTP app for #{inspect(nif_module)} in the metadata file. Please compile the project again with: `mix compile --force`."
598+
end
599+
end
600+
601+
defp checksum_file(nif_module) do
602+
# Saves the file in the project root.
603+
Path.join(File.cwd!(), "checksum-#{nif_module}.exs")
604+
end
343605
end

0 commit comments

Comments
 (0)