JuliaStats · andreasnoack · Apr 29, 2025 · Mar 15, 2025 · Mar 15, 2025 · Mar 15, 2025
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -5,3 +5,6 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 
 [compat]
 Documenter = "1"
+
+[sources.StatsBase]
+path = ".."
diff --git a/docs/make.jl b/docs/make.jl
@@ -5,6 +5,8 @@ if Base.HOME_PROJECT[] !== nothing
     Base.HOME_PROJECT[] = abspath(Base.HOME_PROJECT[])
 end
 
+DocMeta.setdocmeta!(StatsBase, :DocTestSetup, :(using StatsBase))
+
 makedocs(
     sitename = "StatsBase.jl",
     modules = [StatsBase, StatsAPI],

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -19,7 +19,7 @@ Pkg.add("StatsBase")
 ```
 
 To load the package, use the command:
-```
+```julia
 using StatsBase
 ```
 

diff --git a/docs/src/weights.md b/docs/src/weights.md
@@ -1,3 +1,8 @@
+```@meta
+DocTestSetup = quote
+    using StatsBase
+end
+```
 # Weight Vectors
 
 In statistical applications, it is not uncommon to assign weights to samples. To facilitate the use of weight vectors, we introduce the abstract type `AbstractWeights` for the purpose of representing weight vectors, which has two advantages:
@@ -68,40 +73,42 @@ weights to past observations.
 
 If `t` is a vector of temporal indices then for each index `i` we compute the weight as:
 
-``λ (1 - λ)^{1 - i}``
+```math
+λ (1 - λ)^{1 - i}
+```
 
 ``λ`` is a smoothing factor or rate parameter such that ``0 < λ ≤ 1``.
 As this value approaches 0, the resulting weights will be almost equal,
 while values closer to 1 will put greater weight on the tail elements of the vector.
 
 For example, the following call generates exponential weights for ten observations with ``λ = 0.3``.
-```julia-repl
+```jldoctest
 julia> eweights(1:10, 0.3)
-10-element Weights{Float64,Float64,Array{Float64,1}}:
+10-element Weights{Float64, Float64, Vector{Float64}}:
  0.3
  0.42857142857142855
  0.6122448979591837
  0.8746355685131197
  1.249479383590171
  1.7849705479859588
- 2.549957925694227
+ 2.5499579256942266
  3.642797036706039
  5.203995766722913
  7.434279666747019
 ```
 
 Simply passing the number of observations `n` is equivalent to passing in `1:n`.
 
-```julia-repl
+```jldoctest
 julia> eweights(10, 0.3)
-10-element Weights{Float64,Float64,Array{Float64,1}}:
+10-element Weights{Float64, Float64, Vector{Float64}}:
  0.3
  0.42857142857142855
  0.6122448979591837
  0.8746355685131197
  1.249479383590171
  1.7849705479859588
- 2.549957925694227
+ 2.5499579256942266
  3.642797036706039
  5.203995766722913
  7.434279666747019
@@ -117,25 +124,24 @@ julia> r
 2019-01-01T01:00:00:1 hour:2019-01-02T01:00:00
 
 julia> eweights(t, r, 0.3)
-3-element Weights{Float64,Float64,Array{Float64,1}}:
+3-element Weights{Float64, Float64, Vector{Float64}}:
  0.3
  0.6122448979591837
  1.249479383590171
 ```
 
-NOTE: This is equivalent to `eweights(something.(indexin(t, r)), 0.3)`, which is saying that for each value in `t` return the corresponding index for that value in `r`.
-Since `indexin` returns `nothing` if there is no corresponding value from `t` in `r` we use `something` to eliminate that possibility.
+!!! note
+    This is equivalent to `eweights(something.(indexin(t, r)), 0.3)`, which is saying that for each value in `t` return the corresponding index for that value in `r`.
+    Since `indexin` returns `nothing` if there is no corresponding value from `t` in `r` we use `something` to eliminate that possibility.
 
 ## Methods
 
 `AbstractWeights` implements the following methods:
-```
-eltype
-length
-isempty
-values
-sum
-```
+- `eltype`
+- `length`
+- `isempty`
+- `values`
+- `sum`
 
 The following constructors are provided:
 ```@docs

diff --git a/src/cov.jl b/src/cov.jl
@@ -188,7 +188,8 @@ cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) =
     error("cov is not defined for $(typeof(ce)), $(typeof(x)) and $(typeof(y))")
 
 """
-    cov(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights]; mean=nothing, dims::Int=1)
+    cov(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights];
+        mean=nothing, dims::Int=1)
 
 Compute the covariance matrix of the matrix `X` along dimension `dims`
 using estimator `ce`. A weighting vector `w` can be specified.
@@ -238,10 +239,8 @@ function cor(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector)
 end
 
 """
-    cor(
-        ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights];
-        mean=nothing, dims::Int=1
-    )
+    cor(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights];
+        mean=nothing, dims::Int=1)
 
 Compute the correlation matrix of the matrix `X` along dimension `dims`
 using estimator `ce`. A weighting vector `w` can be specified.

diff --git a/src/deviation.jl b/src/deviation.jl
@@ -90,7 +90,7 @@ end
     Linfdist(a, b)
 
 Compute the L∞ distance, also called the Chebyshev distance, between
-two arrays: ``\\max_{i\\in1:n} |a_i - b_i|``.
+two arrays: ``\\max_{1≤i≤n} |a_i - b_i|``.
 Efficient equivalent of `maxabs(a - b)`.
 """
 function Linfdist(a::AbstractArray{T}, b::AbstractArray{T}) where T<:Number

diff --git a/src/hist.jl b/src/hist.jl
@@ -155,15 +155,15 @@ closed: right
 isdensity: false
 ```
 ## Example illustrating `isdensity`
-```julia
+```jldoctest
 julia> using StatsBase, LinearAlgebra
 
 julia> bins = [0,1,7]; # a small and a large bin
 
 julia> obs = [0.5, 1.5, 1.5, 2.5]; # one observation in the small bin and three in the large
 
 julia> h = fit(Histogram, obs, bins)
-Histogram{Int64,1,Tuple{Array{Int64,1}}}
+Histogram{Int64, 1, Tuple{Vector{Int64}}}
 edges:
   [0, 1, 7]
 weights: [1, 3]
@@ -173,7 +173,7 @@ isdensity: false
 julia> # observe isdensity = false and the weights field records the number of observations in each bin
 
 julia> normalize(h, mode=:density)
-Histogram{Float64,1,Tuple{Array{Int64,1}}}
+Histogram{Float64, 1, Tuple{Vector{Int64}}}
 edges:
   [0, 1, 7]
 weights: [1.0, 0.5]
@@ -459,7 +459,8 @@ float(h::Histogram{T,N}) where {T,N} = Histogram(h.edges, float(h.weights), h.cl
 
 
 """
-    normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T<:AbstractFloat,N}
+    normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...;
+               mode::Symbol=:pdf) where {T<:AbstractFloat,N}
 
 Normalize the histogram `h` and optionally scale one or more auxiliary weight
 arrays appropriately. See description of `normalize` for details. Returns `h`.

diff --git a/src/reliability.jl b/src/reliability.jl
@@ -19,11 +19,11 @@ Calculate Cronbach's alpha (1951) from a covariance matrix `covmatrix` according
 the [formula](https://en.wikipedia.org/wiki/Cronbach%27s_alpha):
 
 ```math
-\\rho = \\frac{k}{k-1} (1 - \\frac{\\sum^k_{i=1} \\sigma^2_i}{\\sum_{i=1}^k \\sum_{j=1}^k \\sigma_{ij}})
+ρ = \\frac{k}{k-1} \\left(1 - \\frac{\\sum^k_{i=1} σ^2_i}{\\sum_{i=1}^k \\sum_{j=1}^k σ_{ij}}\\right)
 ```
 
-where ``k`` is the number of items, i.e. columns, ``\\sigma_i^2`` the item variance,
-and ``\\sigma_{ij}`` the inter-item covariance.
+where ``k`` is the number of items, i.e. columns, ``σ_i^2`` the item variance,
+and ``σ_{ij}`` the inter-item covariance.
 
 Returns a `CronbachAlpha` object that holds:
 

diff --git a/src/robust.jl b/src/robust.jl
@@ -41,9 +41,9 @@ To compute the trimmed mean of `x` use `mean(trim(x))`;
 to compute the variance use `trimvar(x)` (see [`trimvar`](@ref)).
 
 # Example
-```julia
+```jldoctest
 julia> collect(trim([5,2,4,3,1], prop=0.2))
-3-element Array{Int64,1}:
+3-element Vector{Int64}:
  2
  4
  3
@@ -80,9 +80,9 @@ elements equal the lower or upper bound.
 To compute the Winsorized mean of `x` use `mean(winsor(x))`.
 
 # Example
-```julia
+```jldoctest
 julia> collect(winsor([5,2,3,4,1], prop=0.2))
-5-element Array{Int64,1}:
+5-element Vector{Int64}:
  4
  2
  3

diff --git a/src/sampling.jl b/src/sampling.jl
@@ -188,7 +188,7 @@ knuths_sample!(a::AbstractArray, x::AbstractArray; initshuffle::Bool=true) =
 Fisher-Yates shuffling (with early termination).
 
 Pseudo-code:
-```
+```julia
 n = length(a)
 k = length(x)
 

diff --git a/src/scalarstats.jl b/src/scalarstats.jl
@@ -240,30 +240,30 @@ Let `count_less` be the number of elements of `itr` that are less than `value`,
 Then `method` supports the following definitions:
 
 - `:inc` (default): Return a value in the range 0 to 1 inclusive.
-Return `count_less / (n - 1)` if `value ∈ itr`, otherwise apply interpolation based on
-definition 7 of quantile in Hyndman and Fan (1996)
-(equivalent to Excel `PERCENTRANK` and `PERCENTRANK.INC`).
-This definition corresponds to the lower semi-continuous inverse of
-[`quantile`](@ref) with its default parameters.
+  Return `count_less / (n - 1)` if `value ∈ itr`, otherwise apply interpolation based on
+  definition 7 of quantile in Hyndman and Fan (1996)
+  (equivalent to Excel `PERCENTRANK` and `PERCENTRANK.INC`).
+  This definition corresponds to the lower semi-continuous inverse of
+  [`quantile`](@ref) with its default parameters.
 
 - `:exc`: Return a value in the range 0 to 1 exclusive.
-Return `(count_less + 1) / (n + 1)` if `value ∈ itr` otherwise apply interpolation
-based on definition 6 of quantile in Hyndman and Fan (1996)
-(equivalent to Excel `PERCENTRANK.EXC`).
+  Return `(count_less + 1) / (n + 1)` if `value ∈ itr` otherwise apply interpolation
+  based on definition 6 of quantile in Hyndman and Fan (1996)
+  (equivalent to Excel `PERCENTRANK.EXC`).
 
 - `:compete`: Return `count_less / (n - 1)` if `value ∈ itr`, otherwise
-return `(count_less - 1) / (n - 1)`, without interpolation
-(equivalent to MariaDB `PERCENT_RANK`, dplyr `percent_rank`).
+  return `(count_less - 1) / (n - 1)`, without interpolation
+  (equivalent to MariaDB `PERCENT_RANK`, dplyr `percent_rank`).
 
 - `:tied`: Return `(count_less + count_equal/2) / n`, without interpolation.
-Based on the definition in Roscoe, J. T. (1975)
-(equivalent to `"mean"` kind of SciPy `percentileofscore`).
+  Based on the definition in Roscoe, J. T. (1975)
+  (equivalent to `"mean"` kind of SciPy `percentileofscore`).
 
 - `:strict`: Return `count_less / n`, without interpolation
-(equivalent to `"strict"` kind of SciPy `percentileofscore`).
+  (equivalent to `"strict"` kind of SciPy `percentileofscore`).
 
 - `:weak`: Return `(count_less + count_equal) / n`, without interpolation
-(equivalent to `"weak"` kind of SciPy `percentileofscore`).
+  (equivalent to `"weak"` kind of SciPy `percentileofscore`).
 
 !!! note
     An `ArgumentError` is thrown if `itr` contains `NaN` or `missing` values
@@ -279,7 +279,7 @@ Hyndman, R.J and Fan, Y. (1996) "[Sample Quantiles in Statistical Packages]
 *The American Statistician*, Vol. 50, No. 4, pp. 361-365.
 
 # Examples
-```julia
+```julia-repl
 julia> using StatsBase
 
 julia> v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13];

diff --git a/src/transformations.jl b/src/transformations.jl
@@ -47,6 +47,8 @@ reconstruct(t::AbstractDataTransform, y::AbstractVector{<:Real}) =
     vec(reconstruct(t, reshape(y, :, 1)))
 
 """
+    ZScoreTransform <: AbstractDataTransform
+
 Standardization (Z-score transformation)
 """
 struct ZScoreTransform{T<:Real, U<:AbstractVector{T}} <: AbstractDataTransform
@@ -201,6 +203,8 @@ function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, y::Abstract
 end
 
 """
+    UnitRangeTransform  <: AbstractDataTransform
+
 Unit range normalization
 """
 struct UnitRangeTransform{T<:Real, U<:AbstractVector}  <: AbstractDataTransform
@@ -237,7 +241,7 @@ and return a `UnitRangeTransform` transformation object.
 # Keyword arguments
 
 * `dims`: if `1` fit standardization parameters in column-wise fashion;
- if `2` fit in row-wise fashion. The default is `nothing`.
+  if `2` fit in row-wise fashion. The default is `nothing`.
 
 * `unit`: if `true` (the default) shift the minimum data to zero.
 
@@ -341,8 +345,8 @@ end
 """
     standardize(DT, X; dims=nothing, kwargs...)
 
- Return a standardized copy of vector or matrix `X` along dimensions `dims`
- using transformation `DT` which is a subtype of `AbstractDataTransform`:
+Return a standardized copy of vector or matrix `X` along dimensions `dims`
+using transformation `DT` which is a subtype of `AbstractDataTransform`:
 
 - `ZScoreTransform`
 - `UnitRangeTransform`

diff --git a/src/weights.jl b/src/weights.jl
@@ -230,11 +230,15 @@ If `n` is explicitly passed instead of `t`, `t` defaults to `1:n`.
 
 If `scale` is `true` then for each element `i` in `t` the weight value is computed as:
 
-``(1 - λ)^{n - i}``
+```math
+(1 - λ)^{n - i}
+```
 
 If `scale` is `false` then each value is computed as:
 
-``λ (1 - λ)^{1 - i}``
+```math
+λ (1 - λ)^{1 - i}
+```
 
 # Arguments
 
@@ -250,9 +254,9 @@ If `scale` is `false` then each value is computed as:
 - `scale::Bool`: Return the weights scaled to between 0 and 1 (default: false)
 
 # Examples
-```julia-repl
+```jldoctest
 julia> eweights(1:10, 0.3; scale=true)
-10-element Weights{Float64,Float64,Array{Float64,1}}:
+10-element Weights{Float64, Float64, Vector{Float64}}:
  0.04035360699999998
  0.05764800999999997
  0.08235429999999996
@@ -265,8 +269,8 @@ julia> eweights(1:10, 0.3; scale=true)
  1.0
 ```
 # Links
-- https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
-- https://en.wikipedia.org/wiki/Exponential_smoothing
+- <https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average>
+- <https://en.wikipedia.org/wiki/Exponential_smoothing>
 """
 function eweights(t::AbstractArray{<:Integer}, λ::Real; kwargs...)
     isempty(t) && return Weights(copy(t), 0)
@@ -594,6 +598,7 @@ wsumtype(::Type{T}, ::Type{T}) where {T<:BlasReal} = T
     wsum!(R::AbstractArray, A::AbstractArray,
           w::AbstractVector, dim::Int;
           init::Bool=true)
+
 Compute the weighted sum of `A` with weights `w` over the dimension `dim` and store
 the result in `R`. If `init=false`, the sum is added to `R` rather than starting
 from zero.
@@ -705,11 +710,11 @@ With [`FrequencyWeights`](@ref), the function returns the same result as
 `quantile` for a vector with repeated values. Weights must be integers.
 
 With non `FrequencyWeights`,  denote ``N`` the length of the vector, ``w`` the vector of weights,
-``h = p (\\sum_{i \\leq N} w_i - w_1) + w_1`` the cumulative weight corresponding to the
+``h = p (\\sum_{i ≤ N} w_i - w_1) + w_1`` the cumulative weight corresponding to the
 probability ``p`` and ``S_k = \\sum_{i \\leq k} w_i`` the cumulative weight for each
 observation, define ``v_{k+1}`` the smallest element of `v` such that ``S_{k+1}``
-is strictly superior to ``h``. The weighted ``p`` quantile is given by ``v_k + \\gamma (v_{k+1} - v_k)``
-with  ``\\gamma = (h - S_k)/(S_{k+1} - S_k)``. In particular, when all weights are equal,
+is strictly superior to ``h``. The weighted ``p`` quantile is given by ``v_k + γ (v_{k+1} - v_k)``
+with ``γ = (h - S_k)/(S_{k+1} - S_k)``. In particular, when all weights are equal,
 the function returns the same result as the unweighted `quantile`.
 """
 function quantile(v::AbstractVector{<:Real}{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V,W<:Real}
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,7 +19,7 @@ Pkg.add("StatsBase") @@
     ```
     To load the package, use the command:
-    ```
+    ```julia
     using StatsBase
     ```
@@ Expand Down @@