Skip to content

Commit c451aa1

Browse files
committed
clarifies stream seeking for querying and detection functions
1 parent 50c4ef3 commit c451aa1

File tree

4 files changed

+38
-18
lines changed

4 files changed

+38
-18
lines changed

README.md

+9-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ using FileIO
2525
obj = load(filename)
2626
```
2727
to read data from a formatted file. Likewise, saving might be as simple as
28-
```
28+
```jl
2929
save(filename, obj)
3030
```
3131

@@ -53,9 +53,11 @@ add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
5353
# have one of two possible file extensions
5454
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
5555

56-
# A format whose magic bytes might not be at the beginning of the file,
57-
# necessitating a custom function `detecthdf5` to find them
58-
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"])
56+
# A format whose magic bytes more complicated, necessitating a custom function
57+
# `detectwav` to find them. The function should assume that the stream is
58+
# positioned at the beginning of the file being detected, and the query
59+
# infrastructure will handle seeking to the correct position afterwards.
60+
add_format(format"WAV", detectwav, ".wav")
5961

6062
# A fictitious format that, unfortunately, provides no magic
6163
# bytes. Here we have to place our faith in the file extension.
@@ -103,7 +105,6 @@ using FileIO
103105
# See important note about scope below
104106
function load(f::File{format"PNG"})
105107
open(f) do s
106-
skipmagic(s) # skip over the magic bytes
107108
# You can just call the method below...
108109
ret = load(s)
109110
# ...or implement everything here instead
@@ -112,7 +113,7 @@ end
112113

113114
# You can support streams and add keywords:
114115
function load(s::Stream{format"PNG"}; keywords...)
115-
# s is already positioned after the magic bytes
116+
skipmagic(s) # skip over the magic bytes
116117
# Do the stuff to read a PNG file
117118
chunklength = read(s, UInt32)
118119
...
@@ -130,8 +131,8 @@ end
130131
Note that these are `load` and `save`, **not** `FileIO.load` and `FileIO.save`.
131132
Because a given format might have multiple packages that are capable of reading it,
132133
FileIO will dispatch to these using module-scoping, e.g., `SomePkg.load(args...)`.
133-
Consequently, **packages should define "private" `load` and `save` methods, and
134-
not extend (import) FileIO's**.
134+
Consequently, **packages should define "private" `load` and `save` methods, and
135+
not extend (import) FileIO's**.
135136

136137
`load(::File)` and `save(::File)` should close any streams
137138
they open. (If you use the `do` syntax, this happens for you

src/query.jl

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ For example:
7272
add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"])
7373
add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
7474
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
75+
add_format(format"WAV", detectwav, [".wav", ".WAV"])
7576
7677
Note that extensions, magic numbers, and format-identifiers are case-sensitive.
7778
"""

src/registry.jl

+6-7
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz])
88

99
# test for RD?2 magic sequence at the beginning of R data input stream
1010
function detect_rdata(io)
11-
seekstart(io)
1211
read(io, UInt8) == UInt8('R') &&
1312
read(io, UInt8) == UInt8('D') &&
1413
read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
@@ -19,10 +18,8 @@ end
1918
add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD])
2019

2120
function detect_rdata_single(io)
22-
seekstart(io)
2321
res = read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
2422
(c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n')))
25-
seekstart(io)
2623
return res
2724
end
2825

@@ -145,10 +142,9 @@ add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO])
145142

146143
### Audio formats
147144
function detectwav(io)
148-
seekstart(io)
149145
magic = read!(io, Vector{UInt8}(4))
150146
magic == b"RIFF" || return false
151-
seek(io, 8)
147+
skip(io, 4)
152148
submagic = read!(io, Vector{UInt8}(4))
153149

154150
submagic == b"WAVE"
@@ -200,10 +196,9 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4)
200196

201197
# AVI is a subtype of RIFF, as is WAV
202198
function detectavi(io)
203-
seekstart(io)
204199
magic = read!(io, Vector{UInt8}(4))
205200
magic == b"RIFF" || return false
206-
seek(io, 8)
201+
skip(io, 4)
207202
submagic = read!(io, Vector{UInt8}(4))
208203

209204
submagic == b"AVI "
@@ -212,6 +207,8 @@ add_format(format"AVI", detectavi, ".avi", [:ImageMagick])
212207

213208
# HDF5: the complication is that the magic bytes may start at
214209
# 0, 512, 1024, 2048, or any multiple of 2 thereafter
210+
# this detection function assumes that the stream start and end match the
211+
# file start and end, which is true if it's just a file on disk
215212
h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a)
216213
function detecthdf5(io)
217214
position(io) == 0 || return false
@@ -234,6 +231,8 @@ function detecthdf5(io)
234231
end
235232
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5])
236233

234+
# the STL detection functions assumes that the stream start and end match the
235+
# file start and end, which is true if it's just a file on disk
237236
function detect_stlascii(io)
238237
try
239238
position(io) != 0 && (seekstart(io); return false)

test/query.jl

+22-3
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,32 @@ end
336336
q = query(joinpath(file_dir, "minimal_ascii.rds"))
337337
@test typeof(q) == File{format"RDataSingle"}
338338
open(q) do io
339-
@test position(io) == 0
340339
@test FileIO.detect_rdata_single(io)
341-
# need to seek to beginning of file where data structure starts
342-
@test position(io) == 0
343340
end
344341
end
345342
@testset "Format with function for magic bytes" begin
346343
add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV])
347344
del_format(format"FUNCTION_FOR_MAGIC_BYTES")
348345
end
346+
347+
function detect_position_test(io)
348+
return read(io, 3) == b"DET"
349+
end
350+
351+
@testset "Detection function called with properly-positioned stream" begin
352+
add_format(format"DET", detect_position_test, ".det")
353+
# we need extra junk to work around issue #176
354+
junk = rand(UInt8, 35)
355+
io = IOBuffer()
356+
write(io, "DET")
357+
write(io, junk)
358+
seek(io, 0)
359+
@test query(io) isa Formatted{format"DET"}
360+
@test position(io) == 0
361+
write(io, "junkDET")
362+
write(io, junk)
363+
seek(io, 4)
364+
@test query(io) isa Formatted{format"DET"}
365+
@test position(io) == 4
366+
del_format(format"DET")
367+
end

0 commit comments

Comments
 (0)