From af084519ee9e1c46fd3af47a2a0af644eb7e4072 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 26 Mar 2026 15:45:27 +0000 Subject: [PATCH 01/53] Initial Commit WIP. A framework of modules and functions for SliverMachine. --- src/riak_api_web.erl | 46 ++- src/riak_api_web_acceptor.erl | 535 ++++++++++++++++++++++++++++++++ src/riak_api_web_body.erl | 192 ++++++++++++ src/riak_api_web_headers.erl | 557 ++++++++++++++++++++++++++++++++++ src/riak_api_web_security.erl | 90 +++--- src/riak_api_web_socket.erl | 475 +++++++++++++++++++++++++++++ 6 files changed, 1859 insertions(+), 36 deletions(-) create mode 100644 src/riak_api_web_acceptor.erl create mode 100644 src/riak_api_web_body.erl create mode 100644 src/riak_api_web_headers.erl create mode 100644 src/riak_api_web_socket.erl diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 8b609f6..18b853f 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -25,11 +25,53 @@ -module(riak_api_web). --export([get_listeners/0, - binding_config/2]). +-export( + [ + get_listeners/0, + binding_config/2, + add_routes/1, + get_route/2 + ] +). -include_lib("kernel/include/logger.hrl"). +-define(ROUTE_KEY, {?MODULE, web_routes}). + +-type route() :: {1..100, module()}. + + +-spec add_routes(list(route())) -> ok. +add_routes(Routes) -> + CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), + NewRoutes = lists:keysort(1, CurrentRoutes ++ Routes), + persistent_term:put(?ROUTE_KEY, NewRoutes). + +-spec get_route( + riak_api_web_acceptor:method(), + unicode:chardata() +) -> + { + ok, + module(), + any(), + {pos_integer(), pos_integer(), pos_integer()} + } | + riak_api_web_acceptor:halt_response(). +get_route(Method, Path) -> + CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), + get_route(CurrentRoutes, Method, Path). + +get_route([], _Method, _Path) -> + {halt, 404, none, <<>>, []}; +get_route([{_P, CallbackMod}|Rest], Method, Path) -> + case CallbackMod:match_route(Method, Path) of + no_match -> + get_route(Rest, Method, Path); + {Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} -> + {ok, CallbackMod, Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} + end. + get_listeners() -> get_listeners(http) ++ get_listeners(https). diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl new file mode 100644 index 0000000..43debef --- /dev/null +++ b/src/riak_api_web_acceptor.erl @@ -0,0 +1,535 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Handler for a HTTP connection, where the connection will be associated +%% With a module implementing the riak_api_web_rest behaviour + +-module(riak_api_web_acceptor). + +-if(?OTP_RELEASE == 26). +-feature(maybe_expr, enable). +-endif. + +-export([start_link/1, init/2]). + +-export([extend_buffer/4]). + +-include_lib("kernel/include/logger.hrl"). + +-define(ACCEPT_TIMEOUT, 10000). +-define(RECEIVE_TIMEOUT, 60000). +-define(CONTINUE_RESPONSE, <<"HTTP 1.1 100 Continue">>). + +-type response_code() :: + 400 | + 413 | + 431 | + 200. + +-type method() :: + 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE' | 'OPTIONS' | 'TRACE'. + +-type http_version() :: + {1, 0} | { 1, 1}. + +-type halt_response() :: + { + halt, + response_code(), + riak_api_web_headers:headers()|none, + binary(), + list() + }. + +-type halt_result() :: + { + halt, + response_code(), + riak_api_web_headers:header_list(), + binary(), + riak_api_web_socket:socket() + }. +-type good_result() :: + { + finish, + boolean(), + response_code(), + riak_api_web_headers:headers(), + {stream, stream_fun()} | binary(), + {module(), any()}, + riak_api_web_socket:socket(), + binary(), + pos_integer() + }. + +-type stream_fun() :: fun(() -> {ok, binary()}|done). + +-export_type([halt_response/0, method/0]). + +%%%============================================================================ +%%% API +%%%============================================================================ + +-spec start_link(riak_api_web_socket:socket()) -> pid(). +start_link(Socket) -> + spawn_link(?MODULE, init, [self(), Socket]). + +-spec init(pid(), riak_api_web_socket:socket()) -> ok. +init(Server, Socket) -> + case riak_api_web_socket:accept(Socket, ?ACCEPT_TIMEOUT) of + {ok, Socket} -> + ok = riak_api_web_socket:acceptor_accepted(Server), + loop(Socket, <<>>); + {error, timeout} -> + init(Server, Socket); + {error, {tls_alert, Alert}} -> + ?LOG_WARNING("TLS Alert received ~0p", [Alert]), + init(Server, Socket); + {error, closed} -> + ok; + {error, Other} -> + exit({error, Other}) + end. + +%%%============================================================================ +%%% Primary Loop +%%%============================================================================ + +-spec loop(riak_api_web_socket:socket(), binary()) -> ok. +loop(Socket, InitBuffer) -> + %% In the keepalive loop, the send buffer is assumed to be empty + %% An so pipelining of requests (in parallel) is explicitly not supported + case handle_request(Socket, InitBuffer) of + {KeepAlive, Buffer} when KeepAlive == true -> + loop(Socket, Buffer); + _Close -> + riak_api_web_socket:close(Socket), + ok + end. + +handle_request(Socket, InitBuffer) -> + StartTime = os:system_time(microsecond), + reset_version(), + RequestResult = + maybe + {ok, Peer} = riak_api_web_socket:get_peer(Socket), + {ok, {Method, RawPath, Version, HdrBuffer}} ?= + get_request_line(Socket, InitBuffer), + set_version(Version), + {ok, {Path, QueryParams}} ?= split_path(RawPath), + { + ok, + CallbackMod, + InitModCtx, + {MaxHdrCount, MaxHdrSize, MaxBodySize} + } ?= + riak_api_web:get_route(Method, Path), + {ok, ReqHeaders, BdyBuffer} ?= + get_request_headers( + HdrBuffer, + Socket, + {MaxHdrCount, MaxHdrSize} + ), + {ok, ModCtx1} ?= + CallbackMod:check_permissions( + InitModCtx, + ReqHeaders, + element(1, Socket), + Peer + ), + {ok, ModCtx2} ?= + CallbackMod:parse_query_params(ModCtx1, QueryParams), + {ok, ModCtx3} ?= + CallbackMod:parse_request_headers(ModCtx2, ReqHeaders), + {ok, {CLorChunk, UseGzip}} ?= expect_body(ReqHeaders), + {ok, InitReqBdy} ?= + riak_api_web_body:initiate_body( + extend_buffer_fun(Socket), + BdyBuffer, + CLorChunk, + UseGzip, + MaxBodySize + ), + ok ?= send_continue(Socket, ReqHeaders), + {ok, Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1, ModCtx4} ?= + CallbackMod:process_request( + ModCtx3, + InitReqBdy + ), + Keepalive = + request_prefers_keepalive(Version, ReqHeaders) andalso + KeepAliveOK, + MergedRspHeaders = + riak_api_web_headers:enter_from_list( + RspHeaders, + default_response_headers(Keepalive) + ), + { + finish, + Keepalive, + Code, + MergedRspHeaders, + RspBody, + {CallbackMod, ModCtx4}, + Socket, + riak_api_web_body:get_buffer(ReqBdy1), + StartTime + } + else + {halt, HaltRspCode, HaltRspHeaders, HaltRspText, HaltRspSubs} -> + HaltRspBody = generate_error_body(HaltRspText, HaltRspSubs), + {halt, HaltRspCode, HaltRspHeaders, HaltRspBody, Socket} + end, + handle_response(RequestResult). + +%%%============================================================================ +%%% Manage Version on Process dictionary +%%%============================================================================ + + +-define(VERSION_KEY, {?MODULE, http_version}). + +set_version({1, 0}) -> + put(?VERSION_KEY, <<"HTTP 1.0">>); +set_version({1, 1}) -> + put(?VERSION_KEY, <<"HTTP 1.1">>). + +get_version() -> + case get(?VERSION_KEY) of + undefined -> + <<"HTTP 1.0">>; + Tag -> + Tag + end. + +reset_version() -> + put(?VERSION_KEY, undefined). + + +%%%============================================================================ +%%% Internal request handling functions +%%%============================================================================ + +-spec bad_request(binary(), list()) -> halt_response(). +bad_request(Error, Subs) -> + {halt, 400, none, Error, Subs}. + +-spec split_path( + iodata() +) -> + { + ok, + {unicode:chardata(), [{unicode:chardata(), unicode:chardata() | true}]} + } | + halt_response(). +split_path(URIPath) -> + case uri_string:normalize(URIPath, [return_map]) of + URIMap when is_map(URIMap) -> + Path = maps:get(path, URIMap, <<"">>), + case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of + QueryParams when is_list(QueryParams) -> + {ok, {Path, QueryParams}}; + {error, QTerm, QReason} -> + bad_request( + <<"Query parameters not parsed ~w - ~0p">>, + [QTerm, QReason] + ) + end; + {error, NTerm, NReason} -> + bad_request( + <<"Path cannot be normalized ~w - ~0p">>, + [NTerm, NReason] + ) + end. + +-spec extend_buffer( + riak_api_web_socket:socket(), + binary(), + non_neg_integer(), + pos_integer()|undefined +) -> + binary(). +extend_buffer(Socket, Buffer, Needed, Timeout) -> + case riak_api_web_socket:recv(Socket, Needed, get_timeout(Timeout)) of + {ok, Data} when is_binary(Data) -> + <>; + {error, Reason} -> + ?LOG_WARNING( + "Unexpected failure to read data from client " + "~w for socket ~0p", + [Reason, Socket] + ), + riak_api_web_socket:close(Socket), + exit(normal) + end. + +-spec extend_buffer_fun( + riak_api_web_socket:socket() +) -> + riak_api_web_body:buffer_fun(). +extend_buffer_fun(Socket) -> + fun(Buffer, Needed, Timeout) -> + extend_buffer(Socket, Buffer, Needed, Timeout) + end. + +-spec expect_body( + riak_api_web_headers:headers() +) -> + {ok, {non_neg_integer() | chunked , boolean()}} | halt_response(). +expect_body(Headers) -> + ContentLengthH = + riak_api_web_headers:get_unique_value('Content-Length', Headers), + Encoding = + case riak_api_web_headers:get_value('Transfer-Encoding', Headers) of + MultipleValues when is_list(MultipleValues) -> + lists:sort(MultipleValues); + SingleValue -> + SingleValue + end, + case {ContentLengthH, Encoding} of + {ValBin, Encoding} when is_binary(ValBin) -> + try + ContentLength = binary_to_integer(ValBin), + case {ContentLength, Encoding} of + {CL, undefined} when CL >= 0 -> + {ok, {CL, false}}; + {CL, <<"gzip">>} -> + {ok, {CL, true}}; + {_CL, UnsupportedEncoding} -> + bad_request( + << + "Content length provided with unsupported " + "transfer encoding ~0p" + >>, + [UnsupportedEncoding] + ) + end + catch + _ : _ -> + bad_request(<<"Non-integer content length ~0p">>, [ValBin]) + end; + {undefined, <<"chunked">>} -> + {ok, {chunked, false}}; + {undefined, [<<"chunked">>, <<"gzip">>]} -> + {ok, {chunked, true}}; + {undefined, UnexpectedEncoding} -> + UEWarn = <<"Received encoding ~0p without content length">>, + bad_request(UEWarn, [UnexpectedEncoding]); + {{error, multiple_values}, _} -> + bad_request(<<"Content has non-unique length">>, []) + end. + +-spec generate_error_body(binary(), list(any())) -> binary(). +generate_error_body(ErrorText, Subs) -> + iolist_to_binary( + io_lib:format(ErrorText, Subs) + ). + +-spec get_request_line( + riak_api_web_socket:socket(), + binary() +) -> + {ok, {method(), binary(), http_version(), binary()}} | + halt_response(). +get_request_line(Socket, Buffer) -> + case erlang:decode_packet(http_bin, Buffer, []) of + {more, _} -> + get_request_line( + Socket, + extend_buffer(Socket, Buffer, 0, undefined) + ); + {ok, {http_request, Method, {abs_path, Path}, Version}, Rest} + when is_binary(Path) -> + case Version of + SV when SV == {1, 0}; SV == {1, 1} -> + case Method of + SM when + SM == 'GET'; + SM == 'HEAD'; + SM == 'POST'; + SM == 'PUT'; + SM == 'DELETE'; + SM == 'OPTIONS'; + SM == 'TRACE' -> + {ok, {SM, Path, SV, Rest}}; + _USM -> + {halt, 405, none, <<>>, []} + end; + _USV -> + USVError = <<"Only HTTP 1.0 and 1.1 supported">>, + {halt, 505, none, USVError, []} + end; + {ok, {http_error, Error}, _} -> + bad_request(<<"HTTP error on inbound request ~0p">>, [Error]); + {ok, Unexpected, _} -> + bad_request( + <<"Unexpected error on inbound request ~0p">>, + [Unexpected] + ) + end. + +-spec get_request_headers( + binary(), + riak_api_web_socket:socket(), + {pos_integer(), pos_integer()} +) -> + {ok, riak_api_web_headers:headers(), binary()} | + riak_api_web_acceptor:halt_response(). +get_request_headers(Buffer, Socket, {MaxCount, MaxSize}) -> + riak_api_web_headers:parse_request_block( + Buffer, + fun(Prev) when is_binary(Prev) -> + extend_buffer(Socket, Prev, 0, ?RECEIVE_TIMEOUT) + end, + {MaxCount, MaxSize} + ). + +-spec request_prefers_keepalive( + http_version(), + riak_api_web_headers:headers() +) -> + boolean(). +request_prefers_keepalive({1, 0}, ReqHeaders) -> + %% https://www.rfc-editor.org/rfc/rfc7230#section-6.1 + %% Note that connection options are case insensitive + case riak_api_web_headers:get_value('Connection', ReqHeaders) of + ConnectionOption when is_binary(ConnectionOption) -> + case string:casefold(ConnectionOption) of + <<"keep-alive">> -> + true; + _ -> + false + end; + _ -> + false + end; +request_prefers_keepalive({1, 1}, ReqHeaders) -> + case riak_api_web_headers:get_value('Connection', ReqHeaders) of + ConnectionOption when is_binary(ConnectionOption) -> + case string:casefold(ConnectionOption) of + <<"close">> -> + false; + _ -> + true + end; + _ -> + true + end. + +-spec get_timeout( + undefined|infinity|non_neg_integer() +) -> + non_neg_integer()|infinity. +get_timeout(undefined) -> + ?RECEIVE_TIMEOUT; +get_timeout(infinity) -> + infinity; +get_timeout(Timeout) when is_integer(Timeout), Timeout >= 0 -> + Timeout. + +%%%============================================================================ +%%% Internal response handling functions +%%%============================================================================ + +-spec handle_response( + good_result() | halt_result() +) -> + {boolean(), binary()} | close. +handle_response( + { + finish, + Keepalive, + RspCode, + RspHeaders, + {stream, StreamFun}, + {CallbackMod, Context}, + Socket, + BufferIn, + StartTime + } +) -> + RequestCompleteTime = os:system_time(microsecond), + stream_response(RspCode, RspHeaders, StreamFun, Socket), + ResponseCompleteTime = os:system_time(microsecond), + CallbackMod:record_request(Context, StartTime, RequestCompleteTime, ResponseCompleteTime, stream_complete), + {Keepalive, BufferIn}; +handle_response( + { + finish, + Keepalive, + RspCode, + RspHeaders, + RspBody, + {CallbackMod, Context}, + Socket, + BufferIn, + StartTime + } +) when is_binary(RspBody) -> + RequestCompleteTime = os:system_time(microsecond), + send_response(RspCode, RspHeaders, RspBody, Socket), + ResponseCompleteTime = os:system_time(microsecond), + CallbackMod:record_request(Context, StartTime, RequestCompleteTime, ResponseCompleteTime, send_complete), + {Keepalive, BufferIn}; +handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> + MergedRspHeaders = + riak_api_web_headers:enter_from_list( + RspHeaders, + default_response_headers(false) + ), + send_response(RspCode, MergedRspHeaders, RspBody, Socket), + close. + +-spec send_continue( + riak_api_web_socket:socket(), + riak_api_web_headers:headers() +) -> + ok | {error, term()}. +send_continue(Socket, ReqHeaders) -> + case riak_api_web_headers:lookup(<<"expect">>, ReqHeaders, true) of + {_Key, [<<"100-continue">>]} -> + riak_api_web_socket:send(Socket, ?CONTINUE_RESPONSE); + _Other -> + ok + end. + +-spec stream_response( + response_code(), + riak_api_web_headers:headers(), + stream_fun(), + riak_api_web_socket:socket() +) -> + ok. +stream_response(_RspCode, _RspHeaders, _StreamFun, _Socket) -> + ok. + +-spec send_response( + response_code(), + riak_api_web_headers:headers(), + binary(), + riak_api_web_socket:socket() +) -> + ok. +send_response(_RspCode, _RspHeaders, _RspBody, _Socket) -> + _Version = get_version(), + ok. + +-spec default_response_headers(boolean()) -> riak_api_web_headers:headers(). +default_response_headers(_Keepalive) -> + riak_api_web_headers:make_rsp_header([]). diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl new file mode 100644 index 0000000..ca50346 --- /dev/null +++ b/src/riak_api_web_body.erl @@ -0,0 +1,192 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007-2009 Basho Technologies +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Handling functions for receiving and sending object bodies over HTTP +%% +%% Handling of chunked requests, and some other parts inspired by webmachine. + +-module(riak_api_web_body). + +-export([get_buffer/1, initiate_body/5, get_body/3]). + +-ifdef(TEST). +-record(req_body, + { + buffer :: binary(), + content_length :: non_neg_integer() | chunked, + gzip :: boolean(), + acc_size :: non_neg_integer(), + max_size :: pos_integer(), + buffer_fun :: buffer_fun(), + test_only = undefined :: any()| undefined + % to be used in tests to mimic scenarios + } +). +-else. +-record(req_body, + { + buffer :: binary(), + content_length :: non_neg_integer() | chunked, + gzip :: boolean(), + acc_size :: non_neg_integer(), + max_size :: pos_integer(), + buffer_fun :: buffer_fun() + } +). +-endif. + +-type req_body() :: #req_body{}. + +-type buffer_fun() :: + fun((binary(), pos_integer(), non_neg_integer()|undefined) -> binary()). + +-export_type([req_body/0, buffer_fun/0]). + +%%%============================================================================ +%%% API +%%%============================================================================ + +-spec get_buffer(req_body()) -> binary(). +get_buffer(ReqBody) -> + ReqBody#req_body.buffer. + +-spec initiate_body( + buffer_fun(), + binary(), + chunked | non_neg_integer(), + boolean(), + pos_integer() +) -> + {ok, req_body()}. +initiate_body(BufferFun, BdyBuffer, CLorChunk, UseGzip, MaxBodySize) -> + { + ok, + #req_body{ + buffer = BdyBuffer, + content_length = CLorChunk, + gzip = UseGzip, + acc_size = 0, + max_size = MaxBodySize, + buffer_fun = BufferFun + } + }. + +-spec get_body( + req_body(), all|pos_integer(), pos_integer()|undefined +) -> + {binary()|done, req_body()} | {error, content_too_large}. +get_body(#req_body{content_length = CL, max_size = MS}, _SL, _TO) + when is_integer(CL), CL > MS -> + {error, content_too_large}; +get_body(#req_body{content_length = CL, acc_size = AS} = RqBdy, _SL, _TO) + when is_integer(CL), CL == AS -> + {done, RqBdy}; +get_body( + #req_body{content_length = CL, acc_size = AccSize, buffer = Bin} = RqBdy, + all, + TO +) when is_integer(CL) -> + case byte_size(Bin) + AccSize of + AccSize0 when AccSize0 > CL -> + <> = Bin, + { + ReqBody, + RqBdy#req_body{ + buffer = Rest, + acc_size = CL + } + }; + AccSize0 -> + get_body( + extend_buffer(RqBdy, CL - AccSize0, TO), + all, + TO + ) + end; +get_body( + #req_body{content_length = CL, acc_size = AccSize, buffer = Bin} = RqBdy, + SL, + TO +) when is_integer(CL), is_integer(SL) -> + case CL - AccSize of + Remaining when Remaining =< SL -> + case byte_size(Bin) of + BS when BS >= Remaining -> + <> = Bin, + { + SliceBody, + RqBdy#req_body{ + buffer = Rest, + acc_size = CL + } + }; + BS -> + get_body( + extend_buffer(RqBdy, Remaining - BS, TO), + all, + TO + ) + end; + _Remaining -> + case byte_size(Bin) of + BS when BS >= SL -> + <> = Bin, + { + SliceBody, + RqBdy#req_body{ + buffer = Rest, + acc_size = RqBdy#req_body.acc_size + SL + } + }; + BS -> + get_body( + extend_buffer(RqBdy, SL - BS, TO), + SL, + TO + ) + end + end; +get_body( + #req_body{content_length = CL, max_size = MS, acc_size = AS}, + _SL, + _TO +) when CL == chunked, AS > MS -> + {error, content_too_large}. + +-spec extend_buffer( + req_body(), + pos_integer(), + non_neg_integer()|undefined +) -> + req_body(). +extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> + ReqBody#req_body{ + buffer = + BufferFun(ReqBody#req_body.buffer, Size, Timeout) + }. + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +-endif. \ No newline at end of file diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl new file mode 100644 index 0000000..4062255 --- /dev/null +++ b/src/riak_api_web_headers.erl @@ -0,0 +1,557 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007 Mochi Media, Inc +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Case preserving (but case insensitive) HTTP Header dictionary. +%% +%% The headers are stored in a map, and the header keys will be an atom if +%% in the standard list of headers decoded by Erlang/OTP - and otherwise a +%% binary(). +%% +%% The values will always be binaries, comma(-and-space)-separated for values +%% with multiple items +%% +%% The module was initially a refactoring of the mochiweb_headers module. + +-module(riak_api_web_headers). +-export([make/1, make_rsp_header/1]). +-export([enter_from_list/2, default_from_list/2, enter/3]). +-export([get_value/2, get_unique_value/2, lookup/3, prefix_fold/3]). +-export([parse_primary_header_value/1]). +-export([output_response_block/1, parse_request_block/3]). + +-define(KV_SEPARATOR, <<": ">>). +-define(V_SEPARATOR, <<", ">>). +-define(L_SEPARATOR, <<"\r\n">>). + +-record(headers, + { + type = request :: request|response, + %% response headers do not support the lookup of non-standard + %% header keys - and hence avoid the need to lower case those + %% keys for comparison + header_map = maps:new() :: header_map() + } +). + +-type standard_header_key() :: + 'Cache-Control' | + 'Connection' | + 'Date' | + 'Pragma' | + 'Transfer-Encoding' | + 'Upgrade' | + 'Via' | + 'Accept' | + 'Accept-Charset' | 'Accept-Encoding' | 'Accept-Language' | + 'Authorization' | + 'Proxy-Authorization' | 'Proxy-Authenticate' | 'Www-Authenticate' | + 'From' | + 'Host' | + 'If-Modified-Since' | 'If-Match' | 'If-None-Match' | + 'If-Range' | 'If-Unmodified-Since' | + 'Max-Forwards' | + 'Range' | + 'Referer' | + 'User-Agent' | + 'Age' | + 'Location' | + 'Public' | + 'Retry-After' | + 'Server' | + 'Vary' | + 'Warning' | + 'Allow' | + 'Content-Base' | 'Content-Encoding' | 'Content-Language' | + 'Content-Length' | 'Content-Location' | 'Content-Md5' | + 'Content-Range' | 'Content-Type' | + 'Etag' | + 'Expires' | + 'Last-Modified' | + 'Accept-Ranges' | + 'Set-Cookie' | + 'Set-Cookie2' | + 'X-Forwarded-For' | + 'Cookie' | + 'Keep-Alive' | + 'Proxy-Connection'. + % This list is controlled by Erlang/OTP - i.e. there may be further atoms + % added in the future, but it has been stable since OTP 13. +-type binary_header_key() :: unicode:chardata() | binary(). +-type header_key() :: standard_header_key()|binary_header_key(). +-type header_value() :: {binary(), list(binary())}. +-type header_map() :: #{header_key() => header_value()}. +-type header_list() :: [{header_key(), binary()}]. +-type headers() :: #headers{}. +-type buffer_fun() :: fun((binary()) -> binary()). + +-export_type([headers/0, header_list/0]). + +%%%============================================================================ +%%% API +%%%============================================================================ + +%% @doc +%% Construct a headers() from the given list of headers received in a +%% request. +-spec make([{header_key(), binary()}]) -> headers(). +make(HeaderList) when is_list(HeaderList) -> + HeaderMap = from_list(HeaderList, true), + #headers{header_map = HeaderMap}. + +%% @doc +%% Specific constructor when forming response headers. +%% With response headers it is not possible to lookup non-standard header keys, +%% An the value may be a list if elements - that will be joined into a single +%% comma-separated value before creating the response header. +-spec make_rsp_header([{header_key(), list(binary())|binary()}] +) -> + headers(). +make_rsp_header(HeaderList) -> + HeaderMap = from_list(HeaderList, false), + #headers{type = response, header_map = HeaderMap}. + +%% @doc +%% Insert pairs into the headers, replace any values for existing keys. +%% Specifically used in response headers when setting ranges into existing +%% headers. +-spec enter_from_list([{header_key(), binary()}], headers() +) -> + headers(). +enter_from_list(HeaderList, #headers{type = T, header_map = HM}) + when T == response -> + #headers{ + type = response, + header_map = maps:merge(HM, from_list(HeaderList, false)) + }. + +%% @doc +%% Insert pairs into response headers for keys that do not already exist. +-spec default_from_list([{header_key(), binary()}], headers() +) -> + headers(). +default_from_list(HeaderList, #headers{type = T, header_map = HM}) + when T == response -> + #headers{ + type = response, + header_map = maps:merge(from_list(HeaderList, false), HM) + }. + +%% @doc +%% Add a single value for a single key to the response map +-spec enter(header_key(), binary(), headers() +) -> + headers(). +enter(HeaderKey, Value, #headers{type = T, header_map = HM}) + when T == response -> + {HK, HV} = normalize_header({HeaderKey, Value}, false), + #headers{ + type = response, + header_map = maps:put(HK, HV, HM) + }. + +%% @doc +%% Return the value of the given standard header key. `undefined` will be +%% returned for keys that are not present. +%% For non-standard (binary) keys use lookup/2. +%% If the values was a comma-separated list, or multiple headers have been +%% folded together - then a list rather than a single value is returned. +-spec get_value(standard_header_key(), headers() +) -> + unicode:chardata() | list(unicode:chardata()) | undefined. +get_value(K, H) when is_atom(K) -> + case maps:get(K, H#headers.header_map, undefined) of + undefined -> + undefined; + {_OK, [V]} -> + V; + {_OK, VL} when is_list(VL) -> + VL + end. + +%% @doc +%% If multiple values may be provided for a field, but it is illegal +%% for those values to differ (e.g. in the case of content-length), only return +%% a value, if there is only one unique value. +-spec get_unique_value(standard_header_key(), headers() +) -> + unicode:chardata() | undefined | {error, multiple_values}. +get_unique_value(K, H) -> + case maps:get(K, H#headers.header_map, undefined) of + undefined -> + undefined; + {_OK, [V]} -> + V; + {_OK, VL} when is_list(VL) -> + case sets:to_list(sets:from_list(VL, [{version, 2}])) of + [V] -> + V; + _ -> + {error, multiple_values} + end + end. + +%% @doc +%% some header values consist of primary information supported by secondary +%% information. The primary information is presented before a ';', and the +%% secondary information is `;` separated list +-spec parse_primary_header_value(binary()) -> unicode:chardata(). +parse_primary_header_value(HeaderValue) -> + string:trim( + hd(string:split(HeaderValue, [$;])), + both + ). + +%% @doc +%% Fetch the {original key, values} for a binary (non-standard) header key. +%% There is a boolean flag to indicate if the key has already been subject to +%% casefold. +-spec lookup(binary_header_key(), headers(), boolean() +) -> + {binary(), list(unicode:chardata())} | undefined. +lookup(CaseFoldedKey, H, true) when is_binary(CaseFoldedKey) -> + maps:get(CaseFoldedKey, H#headers.header_map, undefined); +lookup(RawKey, Headers, false) when is_binary(RawKey) -> + lookup(normalize_key(RawKey), Headers, true). + +%% @doc +%% Fetch a list of non-standard headers with a given prefix. The list is a +%% list of {K, [V]} where K is the remainder of the original key once the +%% original prefix has been stripped +-spec prefix_fold(binary_header_key(), headers(), boolean() +) -> + list({unicode:chardata(), list(unicode:chardata())}). +prefix_fold(CaseFoldPrefix, Headers, true) when is_binary(CaseFoldPrefix) -> + Keys = maps:keys(Headers#headers.header_map), + filter_headers( + Keys, + CaseFoldPrefix, + byte_size(CaseFoldPrefix), + Headers#headers.header_map, + [] + ); +prefix_fold(RawPrefix, Headers, false) -> + prefix_fold(normalize_key(RawPrefix), Headers, true). + +%% @doc +%% Output a binary representing the block of response headers to be pushed to +%% the socket. Includes trailing line feed at end of last line, but not a +%% separating line feed to the response body +-spec output_response_block(headers()) -> binary(). +output_response_block(#headers{type = T, header_map = HM}) when T == response -> + HeaderList = maps:values(HM), + iolist_to_binary( + lists:map( + fun({BK, VL}) -> + << + BK/binary, + (?KV_SEPARATOR)/binary, + (join_values(VL))/binary, + (?L_SEPARATOR)/binary + >> + end, + HeaderList + ) + ). + +-define(COUNT_EXCEEDED, <<"Header count exceed ~w">>). +-define(SIZE_EXCEEDED, <<"Header exceeded maximum size of ~w">>). + +%% @doc +%% Parse a binary block representing the start of a block of request headers, +%% with a buffer function to request more should the block be incomplete. +-spec parse_request_block( + binary(), + buffer_fun(), + {pos_integer(), pos_integer()} +) -> + {ok, headers(), binary()} | riak_api_web_acceptor:halt_response(). +parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}) -> + parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {[], 0}). + +parse_request_block(_B, _BFun, {MaxCount, _MS}, {_H, C}) when C > MaxCount -> + {halt, 431, none, ?COUNT_EXCEEDED, [MaxCount]}; +parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {HeaderAcc, C}) -> + case erlang:decode_packet(httph_bin, Buffer, []) of + {ok, {http_header, _, _, _, V}, _} when byte_size(V) > MaxSize -> + {halt, 431, none, ?SIZE_EXCEEDED, [MaxSize]}; + {ok, {http_header, _, Key, _OrigKey, Value}, Rest} when is_atom(Key) -> + parse_request_block( + Rest, + BufferFun, + {MaxCount, MaxSize}, + {[{Key, Value}|HeaderAcc], C + 1} + ); + {ok, {http_header, _, _Key, OrigKey, Value}, Rest} -> + parse_request_block( + Rest, + BufferFun, + {MaxCount, MaxSize}, + {[{OrigKey, Value}|HeaderAcc], C + 1} + ); + {ok, http_eoh, Rest} -> + {ok, make(HeaderAcc), Rest}; + {ok, {http_error, _}, Rest} -> + parse_request_block( + Rest, + BufferFun, + {MaxCount, MaxSize}, + {HeaderAcc, C} + ); + {more, _} -> + parse_request_block( + BufferFun(Buffer), + BufferFun, + {MaxCount, MaxSize}, + {HeaderAcc, C} + ) + end. + +%%%============================================================================ +%%% Internal Functions +%%%============================================================================ + +-spec join_values(list(unicode:chardata())) -> binary(). +-if(?OTP_RELEASE >= 28). +join_values(VL) -> + binary:join(VL, ?V_SEPARATOR). +-else. +join_values(VL) -> + iolist_to_binary(lists:join(?V_SEPARATOR, VL)). +-endif. + +-spec filter_headers( + list(header_key()), + unicode:chardata(), + pos_integer(), + header_map(), + list(header_value()) +) -> + list(header_value()). +filter_headers([], _Prefix, _PL, _HMap, Acc) -> + Acc; +filter_headers([Key|RestKeys], Prefix, PL, HMap, Acc) -> + case Key of + <> -> + {<<_Ignore:PL/binary, Suffix/binary>>, Values} = + maps:get(Key, HMap), + filter_headers(RestKeys, Prefix, PL, HMap, [{Suffix, Values}|Acc]); + _ -> + filter_headers(RestKeys, Prefix, PL, HMap, Acc) + end. + +-spec from_list([{header_key(), binary()|list(binary())}], boolean() +) -> + header_map(). +from_list(HeaderList, IsReqHeader) -> + lists:foldl( + fun(Header, Acc) -> + {NK, {RK, HVL}} = normalize_header(Header, IsReqHeader), + maps:update_with( + NK, + fun({ERK, EHVL}) -> {ERK, HVL ++ EHVL} end, + {RK, HVL}, + Acc + ) + end, + maps:new(), + HeaderList + ). + +-spec normalize_header( + {header_key(), binary()|list(binary())}, boolean() +) -> + {header_key(), header_value()}. +normalize_header({KAtom, Value}, _) when is_atom(KAtom) -> + {KAtom, {atom_to_binary(KAtom), normalize_value(Value)}}; +normalize_header({KBin, Value}, true) when is_binary(KBin) -> + {string:casefold(KBin), {KBin, normalize_value(Value)}}; +normalize_header({KBin, Value}, false) when is_binary(KBin) -> + {KBin, {KBin, normalize_value(Value)}}. + +-spec normalize_key(standard_header_key()) -> standard_header_key(); + (binary_header_key()) -> binary_header_key(). +normalize_key(KAtom) when is_atom(KAtom) -> + KAtom; +normalize_key(KBin) when is_binary(KBin) -> + string:casefold(KBin). + +-spec normalize_value(binary()|list(binary()) +) -> + list(binary()). +normalize_value(MultipleValues) when is_list(MultipleValues) -> + lists:filter(fun is_binary/1, MultipleValues); +normalize_value(FieldValue) when is_binary(FieldValue) -> + lists:map( + fun(V) -> string:trim(V, both) end, + string:split(FieldValue, ?V_SEPARATOR, all) + ). + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +parse_block_test() -> + RequestHeader1 = + << + "content-length: 1024\r\n" + "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" + >>, + RequestHeader2 = + << + "x-riak-index-field2_bin: POSTCODE2|DOB1\r\n" + "\r\n" + >>, + parse_block_tester(RequestHeader1, RequestHeader2). + +parse_splitblock_test() -> + RequestHeader1 = + << + "content-length: 1024\r\n" + "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "X-Riak-Index-field2_bin: POSTCODE1" + >>, + RequestHeader2 = + << + "|DOB1\r\nx-riak-index-field2_bin: POSTCODE2|DOB1\r\n" + "\r\n" + >>, + parse_block_tester(RequestHeader1, RequestHeader2). + +parse_block_tester(RequestHeader1, RequestHeader2) -> + BufferFun = fun(B) -> <> end, + {ok, Headers, <<>>} = + parse_request_block(RequestHeader1, BufferFun, {1024, 2048}), + ?assertMatch( + <<"1024">>, + get_value('Content-Length', Headers) + ), + ?assertMatch( + { + <<"x-riak-index-Field1_bin">>, + [<<"NAME1|DOB1">>, <<"NAME2|DOB1">>, <<"NAME3|DOB1">>] + }, + lookup(<<"x-riak-index-field1_bin">>, Headers, true) + ), + ?assertMatch( + { + <<"x-riak-index-field2_bin">>, + [<<"POSTCODE1|DOB1">>, <<"POSTCODE2|DOB1">>] + }, + lookup(<<"x-riak-index-Field2_bin">>, Headers, false) + ), + ?assertMatch( + <<"1024">>, + get_unique_value('Content-Length', Headers) + ) + . + +riak_metadata_test() -> + RequestHeader1 = + << + "content-length: 1024\r\n" + "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" + >>, + RequestHeader2 = + << + "x-riak-index-field2_bin: POSTCODE2|DOB1\r\n" + "x-riak-meta-key1: METAVALUE1\r\n" + "x-riak-meta-key2: METAVALUE2\r\n" + "\r\n" + >>, + BufferFun = fun(B) -> <> end, + {ok, Headers, <<>>} = + parse_request_block(RequestHeader1, BufferFun, {1024, 2048}), + IndexList = prefix_fold(<<"x-riak-index-">>, Headers, true), + ?assertMatch( + { + <<"Field1_bin">>, + [<<"NAME1|DOB1">>, <<"NAME2|DOB1">>, <<"NAME3|DOB1">>] + }, + lists:keyfind(<<"Field1_bin">>, 1, IndexList) + ), + MetaList = prefix_fold(<<"X-Riak-Meta-">>, Headers, false), + ?assertMatch( + {<<"key1">>, [<<"METAVALUE1">>]}, + lists:keyfind(<<"key1">>, 1, MetaList) + ). + +content_smuggling_test() -> + RequestHeader1 = + << + "content-length: 1024\r\n" + "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" + "content-length: 16384\r\n" + "\r\n" + >>, + {ok, Headers, <<>>} = + parse_request_block(RequestHeader1, fun() -> <<>> end, {1024, 2048}), + ?assertMatch( + {error, multiple_values}, + get_unique_value('Content-Length', Headers) + ). + +response_header_test() -> + InitHeaders = + [ + {'Server', <<"Riak Web API">>}, + {'Content-Length', <<"1024">>}, + {'Etag', <<"sometag">>}, + {<<"X-Riak-Index-field1_bin">>, [<<"NAME1|DOB1">>, <<"NAME2|DOB1">>]}, + {<<"X-Riak-Index-field1_bin">>, <<"NAME3|DOB1 ">>}, + {<<"X-Riak-Index-field2_bin">>, <<"POSTCODE1|DOB1">>} + ], + RespHeaders1 = make_rsp_header(InitHeaders), + DefaultList = + [ + {'Server', <<"Riak Web API 1.0">>}, + {'Date', <<"Mon, 15 Apr 2025 10:06:15 GMT">>} + ], + RespHeaders2 = default_from_list(DefaultList, RespHeaders1), + EntryList = + [ + {'Etag', <<"some_md5_tag">>}, + {'Vary', <<"*">>} + ], + RespHeaders3 = enter_from_list(EntryList, RespHeaders2), + Response = output_response_block(RespHeaders3), + ExpectedResponse = + << + "Date: Mon, 15 Apr 2025 10:06:15 GMT\r\n" + "Server: Riak Web API\r\n" + "Vary: *\r\n" + "Content-Length: 1024\r\n" + "Etag: some_md5_tag\r\n" + "X-Riak-Index-field1_bin: NAME3|DOB1, NAME1|DOB1, NAME2|DOB1\r\n" + "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" + >>, + ?assertMatch(ExpectedResponse, Response) + . + +-endif. diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index e719df1..919f0cc 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -1,39 +1,61 @@ %% @doc Some security helper functions for Riak API endpoints -module(riak_api_web_security). +-include_lib("kernel/include/logger.hrl"). --export([is_authorized/1]). +-export([is_authorised/4]). -%% @doc Check if the user is authorized --spec is_authorized(any()) -> {true, any()} | false | insecure. -is_authorized(ReqData) -> - case riak_core_security:is_enabled() of - true -> - Scheme = wrq:scheme(ReqData), - case Scheme == https of - true -> - case wrq:get_req_header("Authorization", ReqData) of - "Basic " ++ Base64 -> - UserPass = base64:decode_to_string(Base64), - [User, Pass] = [list_to_binary(X) || X <- - string:tokens(UserPass, ":")], - {ok, Peer} = inet_parse:address(wrq:peer(ReqData)), - case riak_core_security:authenticate(User, Pass, - [{ip, Peer}]) - of - {ok, Sec} -> - {true, Sec}; - {error, _} -> - false - end; - _ -> - false - end; - false -> - %% security is enabled, but they're connecting over HTTP. - %% which means if they authed, the credentials would be in - %% plaintext - insecure +-define(AUTH_PREFIX, "Basic "). + +-spec is_authorised( + boolean(), + http|https, + riak_api_web_headers:headers(), + {ip, inet:ip_address()} +) -> + {ok, riak_core_security:context() | undefined} | + riak_api_web_acceptor:halt_response(). +is_authorised(Enabled, Scheme, ReqHeaders, Peer) -> + is_authorised( + Enabled, + Scheme, + ReqHeaders, + Peer, + fun(User, Pass, {ip, Pip}) -> + riak_core_security:authenticate(User, Pass, [{ip, Pip}]) + end + ). + +is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> + case riak_api_web_headers:get_unique_value('Authorization', ReqHeaders) of + << ?AUTH_PREFIX, Base64UP/binary>> -> + try + UserPass = base64:decode(Base64UP), + [User, Pass] = string:lexemes(UserPass, ":"), + case AuthFun(User, Pass, [Peer]) of + {ok, SecContext} -> + {ok, SecContext}; + {error, Error} -> + {halt, 401, <<"~0p">>, [Error]} + end + catch + _ : ExError -> + ?LOG_WARNING("Error decoding credentials ~0p", [ExError]), + {halt, 400, none, <<"Error decoding credentials">>, []} end; - false -> - {true, undefined} %% no security context - end. + Unexpected -> + ?LOG_WARNING("Error decoding credentials ~0p", [Unexpected]), + {halt, 400, none, <<"Error decoding credentials">>, []} + end; +is_authorised(true, http, _ReqHeaders, _Peer, _AuthFun) -> + {halt, 426, none, <<"Upgrade required to https">>}; +is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> + {true, undefined}. + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +-endif. diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl new file mode 100644 index 0000000..77a4076 --- /dev/null +++ b/src/riak_api_web_socket.erl @@ -0,0 +1,475 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007 Mochi Media, Inc +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Socket and acceptor pool management for web requests +%% +%% Socket manager intended to abstract away from choice of SSL, and also +%% maintain a pool of accept processes that are ready to accept new connection +%% requests +%% +%% Each acceptor is an `riak_api_web_acceptor` - an as each acceptor accepts +%% a connection, it will prompt this socket server to launch a new acceptor. +%% When a linked acceptor closes (along with the connection), the close message +%% is handled and the closed acceptor is removed from the pool. +%% +%% The intention is that there should always be at least the pool size of +%% acceptors waiting for a connection - unless the max size is reached, and no +%% new acceptors will be started. This means that concurrently no more +%% connections can be handled concurrently than the max pool size. +%% +%% The module was initially based on the: +%% - mochiweb_socket_server +%% - mochiweb_socket +%% - mochiweb_acceptor +%% +%% Patterns used in these modules have been compared with the Elli web server +%% for validation - https://github.com/elli-lib/elli. + +-module(riak_api_web_socket). + +-behaviour(gen_server). + +-export( + [ + start_link/1, + get_max_pool_size/1, + set_max_pool_size/2, + get_active_pool_size/1 + ] +). + +-export( + [ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2 + ] +). + +-export( + [ + get_scheme/1, + accept/2, + recv/3, + send/2, + close/1, + get_peer/1, + acceptor_accepted/1 + ] +). + +-include_lib("kernel/include/logger.hrl"). + +-define(POOL_SIZE_DEFAULT, 16). +-define(POOL_SIZE_MAX_DEFAULT, 2048). + +-record(socket_state, + { + port :: inet:port_number(), + listener :: socket(), + pool_size = ?POOL_SIZE_DEFAULT :: pos_integer(), + max_pool_size = ?POOL_SIZE_MAX_DEFAULT :: pos_integer(), + acceptor_pool = sets:new([{version, 2}]) :: sets:set() + } +). + +-type socket_option() :: + {ip, inet:ip_address()} | + binary | + {reuseaddr, boolean()} | + %% Assumed necessary to allow for rapid restart of supervised + %% process - e.g. allow for next process to listen on socket even + %% when the previous process has not completed the close + {packet, raw} | + {active, boolean()} + %% After a connection is accepted the socket is manually read to be + %% decoded + . + +-type buffer_option() :: + {recbuf, pos_integer()} | + {sndbuf, pos_integer()} | + {buffer, pos_integer()} + % The size of the user-level buffer used by the driver. + % Not to be confused with options sndbuf and recbuf, which correspond + % to the Kernel socket buffers. For TCP it is recommended to have + % val(buffer) >= val(recbuf) to avoid performance issues because + % of unnecessary copying + . + +-type server_name() :: binary(). + % Name of the root part of the address i.e. + % <<"Protocol://Host:Port">> + +-type option() :: + {acceptor_pool_start_size, pos_integer()} | + % The number of acceptors to be ready to accept an new connection. + % This pool size is not a limit, it is is the starting size. As an + % acceptor picks up a new connection request it will prompt for a new + % acceptor to be spawned (and will not return to the pool once it is + % complete). + {acceptor_pool_max_size, pos_integer()} | + % The maximum number of acceptors in the pool - the total number of + % concurrent requests that can be supported on this port + {ssl, boolean()} | + {ssl_opts, [ssl:tls_server_option()]} | + {ip, inet:ip_address()} | + {port, inet:port_number()} | + {name, server_name()}. + +-type scheme() :: http|https. + +-type web_options() :: list(option()). + +-type socket() :: {http, gen_tcp:socket()}|{https, ssl:sslsocket()}. + +-type tcp_error() :: closed | timeout | system_limit | inet:posix(). +-type tls_error() :: term(). + +-export_type([socket/0]). + +%%%============================================================================ +%%% API +%%%============================================================================ + +-spec start_link(web_options()) -> {ok, pid()}. +start_link(Options) -> + ServerName = + case lists:keyfind(name, 1, Options) of + {name, Name} when is_binary(Name) -> + {local, binary_to_atom(Name)} + end, + {ok, Pid} = gen_server:start_link(ServerName, ?MODULE, Options, []), + {ok, Pid}. + +-spec get_max_pool_size(server_name()) -> pos_integer(). +get_max_pool_size(ServerName) -> + gen_server:call( + binary_to_existing_atom(ServerName), + get_max_pool_size, + infinity + ). + +-spec get_active_pool_size(server_name()) -> pos_integer(). +get_active_pool_size(ServerName) -> + gen_server:call( + binary_to_existing_atom(ServerName), + get_active_pool_size, + infinity + ). + +-spec set_max_pool_size(server_name(), pos_integer()) -> ok. +set_max_pool_size(ServerName, MaxPoolSize) when is_integer(MaxPoolSize) -> + gen_server:cast( + binary_to_existing_atom(ServerName), + {set_max_pool_size, MaxPoolSize} + ). + +-spec acceptor_accepted(pid()) -> ok. +acceptor_accepted(Pid) -> + gen_server:cast(Pid, accepted). + +%%%============================================================================ +%%% gen_server callbacks +%%%============================================================================ + +init(Options) -> + BufferOpts = + case get_tcp_buffer_options() of + [] -> + []; + NonDefaultOpts -> + ?LOG_INFO( + "Non-default TCP buffer options configured for web ~0p", + [NonDefaultOpts] + ), + NonDefaultOpts + end, + {ip, IP} = lists:keyfind(ip, 1, Options), + {port, Port} = lists:keyfind(port, 1, Options), + {Protocol, SSLOpts} = + case lists:keyfind(ssl, 1, Options) of + {ssl, true} -> + {ssl_opts, SSLOptsIn} = lists:keyfind(ssl_opts, 1, Options), + {https, SSLOptsIn}; + _ -> + {http, none} + end, + SocketOpts = default_socket_options(IP), + {ok, Listener} = listen(Protocol, Port, SocketOpts, BufferOpts, SSLOpts), + {AcceptorPool, StartSize, MaxSize} = get_acceptor_pool(Listener, Options), + ?LOG_INFO( + "Acceptor pool for web started on IP ~0p port ~w of size ~w", + [IP, Port, StartSize] + ), + { + ok, + #socket_state{ + listener = Listener, + port = Port, + pool_size = StartSize, + max_pool_size = MaxSize, + acceptor_pool = sets:from_list(AcceptorPool, [{version, 2}]) + } + }. + +handle_call(get_max_pool_size, _From, State) -> + {reply, State#socket_state.max_pool_size, State}; +handle_call(get_active_pool_size, _From, State) -> + {reply, sets:size(State#socket_state.acceptor_pool), State}. + +handle_cast({set_max_pool_size, MPS}, State) -> + case State#socket_state.pool_size of + PS when PS =< MPS -> + {noreply, State#socket_state{max_pool_size = MPS}}; + PS -> + ?LOG_WARNING( + "Ignoring change to max pool size ~w to smaller value than " + "starting pool ~w", + [MPS, PS] + ), + {noreply, State} + end; +handle_cast(accepted, State) -> + case State#socket_state.pool_size of + PS when PS < State#socket_state.max_pool_size -> + P = riak_api_web_acceptor:start_link(State#socket_state.listener), + { + noreply, + State#socket_state{ + acceptor_pool = + sets:add_element(P, State#socket_state.acceptor_pool), + pool_size = PS + 1 + } + }; + _ -> + ?LOG_WARNING( + "Web connection pool reached limit of ~w", + [State#socket_state.pool_size] + ), + {noreply, State} + end. + +handle_info({'EXIT', Pid, normal}, State) -> + { + noreply, + State#socket_state{ + pool_size = State#socket_state.pool_size - 1, + acceptor_pool = + sets:del_element(Pid, State#socket_state.acceptor_pool) + } + }; +handle_info({'EXIT', Pid, Reason}, State) -> + ?LOG_ERROR("Acceptor ~p unexpectedly crashed: ~0p", [Pid, Reason]), + handle_info({'EXIT', Pid, normal}, State). + + +%%%============================================================================ +%%% Internal Functions +%%%============================================================================ + +-spec default_socket_options(inet:ip_address()) -> [socket_option()]. +default_socket_options(IPAddr) -> + [ + {ip, IPAddr}, + binary, + {reuseaddr, true}, + {packet, raw}, + {active, false} + ]. + +-spec get_acceptor_pool(socket(), list(option()) +) -> + {list(pid()), pos_integer(), pos_integer()}. +get_acceptor_pool(Listener, Options) -> + StartSize = + case lists:keyfind(web_acceptor_pool_start_size, 1, Options) of + {acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> + SS; + false -> + application:get_env( + riak_api, + acceptor_pool_start_size, + ?POOL_SIZE_DEFAULT + ) + end, + MaxSize = + case lists:keyfind(web_acceptor_pool_max_size, 1, Options) of + {acceptor_pool_start_size, MS} when is_integer(MS), MS > 0 -> + MS; + false -> + application:get_env( + riak_api, + acceptor_pool_max_size, + ?POOL_SIZE_MAX_DEFAULT + ) + end, + case {StartSize, MaxSize} of + {StartSize, MaxSize} when + is_integer(StartSize), + is_integer(MaxSize), + MaxSize >= StartSize -> + {start_acceptor_pool(Listener, StartSize), StartSize, MaxSize}; + InvalidConfig -> + ?LOG_ERROR( + "Invalid configuration of acceptor pool ~0p - " + "starting with defaults", + [InvalidConfig] + ), + { + start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT), + ?POOL_SIZE_DEFAULT, + ?POOL_SIZE_MAX_DEFAULT + } + end. + +-spec start_acceptor_pool(socket(), pos_integer()) -> list(pid()). +start_acceptor_pool(Listener, Size) -> + lists:map( + fun(_I) -> + P = riak_api_web_acceptor:start_link(Listener), + true = is_pid(P), + P + end, + lists:seq(1, Size) + ). + +-spec get_tcp_buffer_options() -> list(buffer_option()). +get_tcp_buffer_options() -> + get_tcp_buffer_options( + [ + {buffer, web_kernel_buffer}, + {recbuf, web_receive_buffer}, + {sndbuf, web_send_buffer} + ], + [] + ). + +get_tcp_buffer_options([], BufferOptions) -> + BufferOptions; +get_tcp_buffer_options([{Name, EnVar}|Rest], BufferOptions) -> + case application:get_env(riak_api, EnVar) of + {ok, BSize} when is_integer(BSize) -> + get_tcp_buffer_options(Rest, [{Name, BSize}|BufferOptions]); + _ -> + get_tcp_buffer_options(Rest, BufferOptions) + end. + +-spec get_scheme(socket()) -> scheme(). +get_scheme({Scheme, _Socket}) -> + Scheme. + +-spec listen( + scheme(), + inet:port_number(), + list(socket_option()), + list(buffer_option()), + none | list(ssl:tls_server_option()) +) -> + {ok, socket()} | {error, any()}. +listen(http, Port, SocketOpts, BufferOpts, none) -> + case gen_tcp:listen(Port, SocketOpts ++ BufferOpts) of + {ok, Socket} -> + {ok, {http, Socket}}; + {error, Reason} -> + {error, Reason} + end; +listen(https, Port, SocketOpts, BufferOpts, SSLOpts) when SSLOpts =/= none -> + case ssl:listen(Port, SocketOpts ++ BufferOpts ++ SSLOpts) of + {ok, Socket} -> + {ok, {https, Socket}}; + {error, Reason} -> + {error, Reason} + end. + +-spec accept( + socket(), + pos_integer() +) -> + {ok, socket()} |{error, tcp_error()|tls_error()}. +accept({http, Socket}, Timeout) -> + case gen_tcp:accept(Socket, Timeout) of + {ok, S} -> + {ok, {http, S}}; + {error, Reason} -> + {error, Reason} + end; +accept({https, Socket}, Timeout) -> + case ssl:transport_accept(Socket, Timeout) of + {ok, S} -> + case ssl:handshake(S, Timeout) of + {ok, S1} -> + {ok, {https, S1}}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +-spec recv( + socket(), + non_neg_integer(), + non_neg_integer() | infinity +) -> + {ok, binary()} | {error, any()}. +recv({http, Socket}, Size, Timeout) -> + case gen_tcp:recv(Socket, Size, Timeout) of + {ok, Data} when is_binary(Data) -> + {ok, Data}; + {error, Error} -> + {error, Error} + end; +recv({https, Socket}, Size, Timeout) -> + case ssl:recv(Socket, Size, Timeout) of + {ok, Data} when is_binary(Data) -> + {ok, Data}; + {error, Error} -> + {error, Error} + end. + +-spec send(socket(), binary()) -> ok | {error, any()}. +send({http, Socket}, Data) -> + gen_tcp:send(Socket, Data); +send({https, Socket}, Data) -> + ssl:send(Socket, Data). + +-spec close(socket()) -> ok | {error, any()}. +close({http, Socket}) -> + gen_tcp:close(Socket); +close({https, Socket}) -> + ssl:close(Socket). + +-spec get_peer(socket()) -> {ok, inet:ip_address()} | {error, any()}. +get_peer({http, Socket}) -> + case inet:peername(Socket) of + {ok, {Addr, _Port}} when is_tuple(Addr) -> + {ok, Addr}; + {error, Error} -> + {error, Error} + end; +get_peer({https, Socket}) -> + case ssl:peername(Socket) of + {ok, {Addr, _Port}} when is_tuple(Addr) -> + {ok, Addr}; + {error, Error} -> + {error, Error} + end. \ No newline at end of file From fd5c2b4abb5b7f78aa9d68988ab39301ea382961 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 26 Mar 2026 20:00:18 +0000 Subject: [PATCH 02/53] Initial test of get_body --- src/riak_api_web_body.erl | 112 ++++++++++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 18 deletions(-) diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index ca50346..2392a3b 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -26,31 +26,18 @@ -export([get_buffer/1, initiate_body/5, get_body/3]). --ifdef(TEST). -record(req_body, { buffer :: binary(), content_length :: non_neg_integer() | chunked, gzip :: boolean(), - acc_size :: non_neg_integer(), + acc_size = 0 :: non_neg_integer(), max_size :: pos_integer(), buffer_fun :: buffer_fun(), - test_only = undefined :: any()| undefined - % to be used in tests to mimic scenarios - } -). --else. --record(req_body, - { - buffer :: binary(), - content_length :: non_neg_integer() | chunked, - gzip :: boolean(), - acc_size :: non_neg_integer(), - max_size :: pos_integer(), - buffer_fun :: buffer_fun() + test_packets = [] :: list(binary()) + % only used in tests } ). --endif. -type req_body() :: #req_body{}. @@ -82,7 +69,6 @@ initiate_body(BufferFun, BdyBuffer, CLorChunk, UseGzip, MaxBodySize) -> buffer = BdyBuffer, content_length = CLorChunk, gzip = UseGzip, - acc_size = 0, max_size = MaxBodySize, buffer_fun = BufferFun } @@ -104,7 +90,7 @@ get_body( TO ) when is_integer(CL) -> case byte_size(Bin) + AccSize of - AccSize0 when AccSize0 > CL -> + AccSize0 when AccSize0 >= CL -> <> = Bin, { ReqBody, @@ -170,6 +156,19 @@ get_body( ) when CL == chunked, AS > MS -> {error, content_too_large}. +-ifdef(TEST). +extend_buffer(ReqBody, Size, _Timeout) -> + {NextBin, RestPackets} = + accrue_packets( + ReqBody#req_body.test_packets, + Size, + ReqBody#req_body.buffer + ), + ReqBody#req_body{ + buffer = NextBin, + test_packets = RestPackets + }. +-else. -spec extend_buffer( req_body(), pos_integer(), @@ -181,6 +180,7 @@ extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> buffer = BufferFun(ReqBody#req_body.buffer, Size, Timeout) }. +-endif. %%%============================================================================ %%% Eunit tests @@ -189,4 +189,80 @@ extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +slicing_fixed_length_test() -> + %% Receive a 11KB body in 1KB packets + %% Slicing into 2 4KB portions, and 1 3KB + Body = crypto:strong_rand_bytes(11 * 1024), + Packets = packet_testbin(Body, []), + RqBdyInit = + #req_body{ + buffer = <<>>, + content_length = 11 * 1024, + max_size = 1024 * 1024, + test_packets = Packets + }, + {Slice1, RqBdy1} = get_body(RqBdyInit, 4 * 1024, 60 * 1000), + {Slice2, RqBdy2} = get_body(RqBdy1, 4 * 1024, 60 * 1000), + {Slice3, RqBdy3} = get_body(RqBdy2, 4 * 1024, 60 * 1000), + ?assertMatch(4096, byte_size(Slice1)), + ?assertMatch(4096, byte_size(Slice2)), + ?assertMatch(3072, byte_size(Slice3)), + CompleteResult = <>, + ?assertMatch(Body, CompleteResult), + ?assertMatch(<<>>, get_buffer(RqBdy3)), + ?assertMatch(done, element(1, get_body(RqBdy3, 4 * 1024, 60 * 1000))), + + %% Request the full content-length in one shot + {AllBin, RqBody4} = get_body(RqBdyInit, all, 60 * 1000), + ?assertMatch(AllBin, Body), + ?assertMatch(<<>>, get_buffer(RqBody4)), + + % Start with some of the first packet on the buffer, and end with + % some of a pipelined request in the buffer + [FirstPacket|RestPackets] = Packets, + <> = FirstPacket, + DummyRequest = crypto:strong_rand_bytes(64), + RqBdyAlt0 = + #req_body{ + buffer = OnBuffer, + content_length = 11 * 1024, + max_size = 1024 * 1024, + test_packets = [OnSocket|RestPackets] ++ [DummyRequest] + }, + {SliceAlt1, RqBdyAlt1} = get_body(RqBdyAlt0, 4 * 1024, 60 * 1000), + {SliceAlt2, RqBdyAlt2} = get_body(RqBdyAlt1, 4 * 1024, 60 * 1000), + {SliceAlt3, RqBdyAlt3} = get_body(RqBdyAlt2, 4 * 1024, 60 * 1000), + ?assertMatch(4096, byte_size(SliceAlt1)), + ?assertMatch(4096, byte_size(SliceAlt2)), + ?assertMatch(3072, byte_size(SliceAlt3)), + CompleteResult = <>, + ?assertMatch( + Body, + <> + ), + SocketBin = iolist_to_binary(RqBdyAlt3#req_body.test_packets), + Remainder = <<(RqBdyAlt3#req_body.buffer)/binary, SocketBin/binary>>, + ?assertMatch(DummyRequest, Remainder) + . + +packet_testbin(<<>>, Acc) -> + lists:reverse(Acc); +packet_testbin(<>, Acc) -> + packet_testbin(Rest, [Bin|Acc]). + +accrue_packets(Rest, 0, Buffer) -> + {Buffer, Rest}; +accrue_packets([NextPacket|Rest], Size, Buffer) -> + case Size of + Needed when Needed < byte_size(NextPacket) -> + <> = NextPacket, + {<>, [RestPacket|Rest]}; + Needed -> + accrue_packets( + Rest, + Needed - byte_size(NextPacket), + <> + ) + end. + -endif. \ No newline at end of file From 256151c0438036780c8140c6bc31688da19b5a89 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 26 Mar 2026 20:06:20 +0000 Subject: [PATCH 03/53] Fix formatting of new files Apply only to new files, or heavily altered files so that broader change history is maintained. --- .github/workflows/erlang.yml | 2 + rebar.config | 48 ++++++-- src/riak_api_web.erl | 117 ++++++++++++-------- src/riak_api_web_acceptor.erl | 120 +++++++++++--------- src/riak_api_web_body.erl | 67 ++++++----- src/riak_api_web_headers.erl | 202 +++++++++++++++++----------------- src/riak_api_web_security.erl | 14 +-- src/riak_api_web_socket.erl | 139 ++++++++++++----------- 8 files changed, 391 insertions(+), 318 deletions(-) diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index c142f59..6b174f3 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -34,3 +34,5 @@ jobs: run: ./rebar3 do xref, dialyzer - name: Run eunit run: ./rebar3 as gha do eunit + - name: Check format + run: ./rebar3 fmt --check diff --git a/rebar.config b/rebar.config index d3f77f6..c022a27 100644 --- a/rebar.config +++ b/rebar.config @@ -4,20 +4,54 @@ {erl_opts, [warnings_as_errors]}. +{erlfmt, [ + write, + {print_width, 80}, + {files, [ + "src/riak_api_web_acceptor.erl", + "src/riak_api_web_body.erl", + "src/riak_api_web_headers.erl", + "src/riak_api_web_security.erl", + "src/riak_api_web_socket.erl", + "src/riak_api_web.erl", + "test/end_to_end/*.erl", + "rebar.config" + ]}, + {exclude_files, []} +]}. + +{project_plugins, [ + {erlfmt, {git, "https://github.com/OpenRiak/erlfmt.git", {branch, "main"}}} +]}. + {eunit_opts, [verbose]}. {deps, [ - {riak_pb, {git, "https://github.com/OpenRiak/riak_pb.git", {branch, "openriak-3.4"}}}, - {webmachine, {git, "https://github.com/OpenRiak/webmachine.git", {branch, "openriak-3.4"}}}, - {mochiweb, {git, "https://github.com/OpenRiak/mochiweb.git", {branch, "openriak-3.4"}}}, - {riak_core, {git, "https://github.com/OpenRiak/riak_core.git", {branch, "openriak-4.0"}}} - ]}. + {riak_pb, + {git, "https://github.com/OpenRiak/riak_pb.git", + {branch, "openriak-3.4"}}}, + {webmachine, + {git, "https://github.com/OpenRiak/webmachine.git", + {branch, "openriak-3.4"}}}, + {mochiweb, + {git, "https://github.com/OpenRiak/mochiweb.git", + {branch, "openriak-3.4"}}}, + {riak_core, + {git, "https://github.com/OpenRiak/riak_core.git", + {branch, "openriak-4.0"}}} +]}. {profiles, [ - {test, [{deps, [{meck, {git, "https://github.com/OpenRiak/meck.git", {branch, "openriak-3.4"}}}]}]}, + {test, [ + {deps, [ + {meck, + {git, "https://github.com/OpenRiak/meck.git", + {branch, "openriak-3.4"}}} + ]} + ]}, {gha, [{erl_opts, [{d, 'GITHUBEXCLUDE'}]}]} ]}. {dialyzer, [{plt_apps, all_deps}]}. -{xref_checks,[undefined_function_calls,undefined_functions,locals_not_used]}. +{xref_checks, [undefined_function_calls, undefined_functions, locals_not_used]}. diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 18b853f..569558f 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -24,7 +24,6 @@ %% of Riak. -module(riak_api_web). - -export( [ get_listeners/0, @@ -40,7 +39,6 @@ -type route() :: {1..100, module()}. - -spec add_routes(list(route())) -> ok. add_routes(Routes) -> CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), @@ -50,21 +48,21 @@ add_routes(Routes) -> -spec get_route( riak_api_web_acceptor:method(), unicode:chardata() -) -> +) -> { ok, module(), any(), {pos_integer(), pos_integer(), pos_integer()} - } | - riak_api_web_acceptor:halt_response(). + } + | riak_api_web_acceptor:halt_response(). get_route(Method, Path) -> CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), get_route(CurrentRoutes, Method, Path). get_route([], _Method, _Path) -> {halt, 404, none, <<>>, []}; -get_route([{_P, CallbackMod}|Rest], Method, Path) -> +get_route([{_P, CallbackMod} | Rest], Method, Path) -> case CallbackMod:match_route(Method, Path) of no_match -> get_route(Rest, Method, Path); @@ -76,66 +74,93 @@ get_listeners() -> get_listeners(http) ++ get_listeners(https). get_listeners(Scheme) -> - Listeners = case app_helper:try_envs([{riak_api, Scheme}, - {riak_core, Scheme}], []) of - {riak_api, Scheme, List} when is_list(List) -> - List; - {riak_core, Scheme, List} when is_list(List) -> - ?LOG_WARNING("Setting riak_core/~s is deprecated, please use riak_api/~s", [Scheme, Scheme]), - List; - _ -> - [] - end, - lists:usort([ {Scheme, Binding} || Binding <- Listeners ]). + Listeners = + case + app_helper:try_envs( + [ + {riak_api, Scheme}, + {riak_core, Scheme} + ], + [] + ) + of + {riak_api, Scheme, List} when is_list(List) -> + List; + {riak_core, Scheme, List} when is_list(List) -> + ?LOG_WARNING( + "Setting riak_core/~s is deprecated, please use riak_api/~s", + [Scheme, Scheme] + ), + List; + _ -> + [] + end, + lists:usort([{Scheme, Binding} || Binding <- Listeners]). binding_config(Scheme, Binding) -> {Ip, Port} = Binding, Name = spec_name(Scheme, Ip, Port), Config = spec_from_binding(Scheme, Name, Binding), - {Name, - {webmachine_mochiweb, start, [Config]}, - permanent, 5000, worker, [mochiweb_socket_server]}. + {Name, {webmachine_mochiweb, start, [Config]}, permanent, 5000, worker, [ + mochiweb_socket_server + ]}. spec_from_binding(http, Name, {Ip, Port}) -> - Options = - lists:flatten([{name, Name}, - {ip, Ip}, - {port, Port}, - {nodelay, true}], - common_config()), + Options = + lists:flatten( + [ + {name, Name}, + {ip, Ip}, + {port, Port}, + {nodelay, true} + ], + common_config() + ), add_recbuf(Options); spec_from_binding(https, Name, {Ip, Port}) -> - Options = - lists:flatten([{name, Name}, - {ip, Ip}, - {port, Port}, - {ssl, true}, - {ssl_opts, riak_api_ssl:options()}, - {nodelay, true}], - common_config()), + Options = + lists:flatten( + [ + {name, Name}, + {ip, Ip}, + {port, Port}, + {ssl, true}, + {ssl_opts, riak_api_ssl:options()}, + {nodelay, true} + ], + common_config() + ), add_recbuf(Options). add_recbuf(Options) -> case application:get_env(webmachine, recbuf) of {ok, RecBuf} -> - [{recbuf, RecBuf}|Options]; + [{recbuf, RecBuf} | Options]; _ -> Options end. spec_name(Scheme, Ip, Port) -> - FormattedIP = if is_tuple(Ip); tuple_size(Ip) == 4 -> - inet_parse:ntoa(Ip); - is_tuple(Ip); tuple_size(Ip) == 8 -> - [$[, inet_parse:ntoa(Ip), $]]; - true -> Ip - end, + FormattedIP = + if + is_tuple(Ip); tuple_size(Ip) == 4 -> + inet_parse:ntoa(Ip); + is_tuple(Ip); tuple_size(Ip) == 8 -> + [$[, inet_parse:ntoa(Ip), $]]; + true -> + Ip + end, lists:flatten(io_lib:format("~s://~s:~p", [Scheme, FormattedIP, Port])). common_config() -> - [{log_dir, app_helper:get_env(riak_api, http_logdir, - app_helper:get_env(riak_core, platform_log_dir, "log"))}, - {backlog, 128}, - {dispatch, [{[], riak_api_wm_urlmap, []} - ]}]. + [ + {log_dir, + app_helper:get_env( + riak_api, + http_logdir, + app_helper:get_env(riak_core, platform_log_dir, "log") + )}, + {backlog, 128}, + {dispatch, [{[], riak_api_wm_urlmap, []}]} + ]. diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 43debef..7c4f17c 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -1,4 +1,4 @@ -%% ------------------------------------------------------------------- +%% ------------------------------------------------------------------- %% %% Copyright (c) 2026 Martin Sumner %% @@ -37,22 +37,22 @@ -define(CONTINUE_RESPONSE, <<"HTTP 1.1 100 Continue">>). -type response_code() :: - 400 | - 413 | - 431 | - 200. + 400 + | 413 + | 431 + | 200. -type method() :: 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE' | 'OPTIONS' | 'TRACE'. -type http_version() :: - {1, 0} | { 1, 1}. + {1, 0} | {1, 1}. -type halt_response() :: { halt, response_code(), - riak_api_web_headers:headers()|none, + riak_api_web_headers:headers() | none, binary(), list() }. @@ -67,18 +67,18 @@ }. -type good_result() :: { - finish, - boolean(), - response_code(), - riak_api_web_headers:headers(), - {stream, stream_fun()} | binary(), - {module(), any()}, - riak_api_web_socket:socket(), - binary(), - pos_integer() + finish, + boolean(), + response_code(), + riak_api_web_headers:headers(), + {stream, stream_fun()} | binary(), + {module(), any()}, + riak_api_web_socket:socket(), + binary(), + pos_integer() }. --type stream_fun() :: fun(() -> {ok, binary()}|done). +-type stream_fun() :: fun(() -> {ok, binary()} | done). -export_type([halt_response/0, method/0]). @@ -172,12 +172,12 @@ handle_request(Socket, InitBuffer) -> ModCtx3, InitReqBdy ), - Keepalive = + Keepalive = request_prefers_keepalive(Version, ReqHeaders) andalso - KeepAliveOK, + KeepAliveOK, MergedRspHeaders = riak_api_web_headers:enter_from_list( - RspHeaders, + RspHeaders, default_response_headers(Keepalive) ), { @@ -202,7 +202,6 @@ handle_request(Socket, InitBuffer) -> %%% Manage Version on Process dictionary %%%============================================================================ - -define(VERSION_KEY, {?MODULE, http_version}). set_version({1, 0}) -> @@ -221,7 +220,6 @@ get_version() -> reset_version() -> put(?VERSION_KEY, undefined). - %%%============================================================================ %%% Internal request handling functions %%%============================================================================ @@ -232,12 +230,12 @@ bad_request(Error, Subs) -> -spec split_path( iodata() -) -> +) -> { ok, {unicode:chardata(), [{unicode:chardata(), unicode:chardata() | true}]} - } | - halt_response(). + } + | halt_response(). split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> @@ -250,7 +248,7 @@ split_path(URIPath) -> <<"Query parameters not parsed ~w - ~0p">>, [QTerm, QReason] ) - end; + end; {error, NTerm, NReason} -> bad_request( <<"Path cannot be normalized ~w - ~0p">>, @@ -262,8 +260,8 @@ split_path(URIPath) -> riak_api_web_socket:socket(), binary(), non_neg_integer(), - pos_integer()|undefined -) -> + pos_integer() | undefined +) -> binary(). extend_buffer(Socket, Buffer, Needed, Timeout) -> case riak_api_web_socket:recv(Socket, Needed, get_timeout(Timeout)) of @@ -281,7 +279,7 @@ extend_buffer(Socket, Buffer, Needed, Timeout) -> -spec extend_buffer_fun( riak_api_web_socket:socket() -) -> +) -> riak_api_web_body:buffer_fun(). extend_buffer_fun(Socket) -> fun(Buffer, Needed, Timeout) -> @@ -290,8 +288,8 @@ extend_buffer_fun(Socket) -> -spec expect_body( riak_api_web_headers:headers() -) -> - {ok, {non_neg_integer() | chunked , boolean()}} | halt_response(). +) -> + {ok, {non_neg_integer() | chunked, boolean()}} | halt_response(). expect_body(Headers) -> ContentLengthH = riak_api_web_headers:get_unique_value('Content-Length', Headers), @@ -321,7 +319,7 @@ expect_body(Headers) -> ) end catch - _ : _ -> + _:_ -> bad_request(<<"Non-integer content length ~0p">>, [ValBin]) end; {undefined, <<"chunked">>} -> @@ -344,9 +342,9 @@ generate_error_body(ErrorText, Subs) -> -spec get_request_line( riak_api_web_socket:socket(), binary() -) -> - {ok, {method(), binary(), http_version(), binary()}} | - halt_response(). +) -> + {ok, {method(), binary(), http_version(), binary()}} + | halt_response(). get_request_line(Socket, Buffer) -> case erlang:decode_packet(http_bin, Buffer, []) of {more, _} -> @@ -354,19 +352,21 @@ get_request_line(Socket, Buffer) -> Socket, extend_buffer(Socket, Buffer, 0, undefined) ); - {ok, {http_request, Method, {abs_path, Path}, Version}, Rest} - when is_binary(Path) -> + {ok, {http_request, Method, {abs_path, Path}, Version}, Rest} when + is_binary(Path) + -> case Version of SV when SV == {1, 0}; SV == {1, 1} -> case Method of - SM when + SM when SM == 'GET'; - SM == 'HEAD'; + SM == 'HEAD'; SM == 'POST'; SM == 'PUT'; SM == 'DELETE'; SM == 'OPTIONS'; - SM == 'TRACE' -> + SM == 'TRACE' + -> {ok, {SM, Path, SV, Rest}}; _USM -> {halt, 405, none, <<>>, []} @@ -388,9 +388,9 @@ get_request_line(Socket, Buffer) -> binary(), riak_api_web_socket:socket(), {pos_integer(), pos_integer()} -) -> - {ok, riak_api_web_headers:headers(), binary()} | - riak_api_web_acceptor:halt_response(). +) -> + {ok, riak_api_web_headers:headers(), binary()} + | riak_api_web_acceptor:halt_response(). get_request_headers(Buffer, Socket, {MaxCount, MaxSize}) -> riak_api_web_headers:parse_request_block( Buffer, @@ -403,7 +403,7 @@ get_request_headers(Buffer, Socket, {MaxCount, MaxSize}) -> -spec request_prefers_keepalive( http_version(), riak_api_web_headers:headers() -) -> +) -> boolean(). request_prefers_keepalive({1, 0}, ReqHeaders) -> %% https://www.rfc-editor.org/rfc/rfc7230#section-6.1 @@ -433,9 +433,9 @@ request_prefers_keepalive({1, 1}, ReqHeaders) -> end. -spec get_timeout( - undefined|infinity|non_neg_integer() -) -> - non_neg_integer()|infinity. + undefined | infinity | non_neg_integer() +) -> + non_neg_integer() | infinity. get_timeout(undefined) -> ?RECEIVE_TIMEOUT; get_timeout(infinity) -> @@ -449,7 +449,7 @@ get_timeout(Timeout) when is_integer(Timeout), Timeout >= 0 -> -spec handle_response( good_result() | halt_result() -) -> +) -> {boolean(), binary()} | close. handle_response( { @@ -467,7 +467,13 @@ handle_response( RequestCompleteTime = os:system_time(microsecond), stream_response(RspCode, RspHeaders, StreamFun, Socket), ResponseCompleteTime = os:system_time(microsecond), - CallbackMod:record_request(Context, StartTime, RequestCompleteTime, ResponseCompleteTime, stream_complete), + CallbackMod:record_request( + Context, + StartTime, + RequestCompleteTime, + ResponseCompleteTime, + stream_complete + ), {Keepalive, BufferIn}; handle_response( { @@ -485,12 +491,18 @@ handle_response( RequestCompleteTime = os:system_time(microsecond), send_response(RspCode, RspHeaders, RspBody, Socket), ResponseCompleteTime = os:system_time(microsecond), - CallbackMod:record_request(Context, StartTime, RequestCompleteTime, ResponseCompleteTime, send_complete), + CallbackMod:record_request( + Context, + StartTime, + RequestCompleteTime, + ResponseCompleteTime, + send_complete + ), {Keepalive, BufferIn}; handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> MergedRspHeaders = riak_api_web_headers:enter_from_list( - RspHeaders, + RspHeaders, default_response_headers(false) ), send_response(RspCode, MergedRspHeaders, RspBody, Socket), @@ -499,7 +511,7 @@ handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> -spec send_continue( riak_api_web_socket:socket(), riak_api_web_headers:headers() -) -> +) -> ok | {error, term()}. send_continue(Socket, ReqHeaders) -> case riak_api_web_headers:lookup(<<"expect">>, ReqHeaders, true) of @@ -514,7 +526,7 @@ send_continue(Socket, ReqHeaders) -> riak_api_web_headers:headers(), stream_fun(), riak_api_web_socket:socket() -) -> +) -> ok. stream_response(_RspCode, _RspHeaders, _StreamFun, _Socket) -> ok. @@ -524,7 +536,7 @@ stream_response(_RspCode, _RspHeaders, _StreamFun, _Socket) -> riak_api_web_headers:headers(), binary(), riak_api_web_socket:socket() -) -> +) -> ok. send_response(_RspCode, _RspHeaders, _RspBody, _Socket) -> _Version = get_version(), diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index 2392a3b..e424594 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -1,4 +1,4 @@ -%% ------------------------------------------------------------------- +%% ------------------------------------------------------------------- %% %% Copyright (c) 2007-2009 Basho Technologies %% Copyright (c) 2026 Martin Sumner @@ -19,30 +19,28 @@ %% %% ------------------------------------------------------------------- %% @doc Handling functions for receiving and sending object bodies over HTTP -%% +%% %% Handling of chunked requests, and some other parts inspired by webmachine. -module(riak_api_web_body). -export([get_buffer/1, initiate_body/5, get_body/3]). --record(req_body, - { - buffer :: binary(), - content_length :: non_neg_integer() | chunked, - gzip :: boolean(), - acc_size = 0 :: non_neg_integer(), - max_size :: pos_integer(), - buffer_fun :: buffer_fun(), - test_packets = [] :: list(binary()) - % only used in tests - } -). +-record(req_body, { + buffer :: binary(), + content_length :: non_neg_integer() | chunked, + gzip :: boolean(), + acc_size = 0 :: non_neg_integer(), + max_size :: pos_integer(), + buffer_fun :: buffer_fun(), + test_packets = [] :: list(binary()) + % only used in tests +}). -type req_body() :: #req_body{}. -type buffer_fun() :: - fun((binary(), pos_integer(), non_neg_integer()|undefined) -> binary()). + fun((binary(), pos_integer(), non_neg_integer() | undefined) -> binary()). -export_type([req_body/0, buffer_fun/0]). @@ -75,14 +73,16 @@ initiate_body(BufferFun, BdyBuffer, CLorChunk, UseGzip, MaxBodySize) -> }. -spec get_body( - req_body(), all|pos_integer(), pos_integer()|undefined -) -> - {binary()|done, req_body()} | {error, content_too_large}. -get_body(#req_body{content_length = CL, max_size = MS}, _SL, _TO) - when is_integer(CL), CL > MS -> + req_body(), all | pos_integer(), pos_integer() | undefined +) -> + {binary() | done, req_body()} | {error, content_too_large}. +get_body(#req_body{content_length = CL, max_size = MS}, _SL, _TO) when + is_integer(CL), CL > MS +-> {error, content_too_large}; -get_body(#req_body{content_length = CL, acc_size = AS} = RqBdy, _SL, _TO) - when is_integer(CL), CL == AS -> +get_body(#req_body{content_length = CL, acc_size = AS} = RqBdy, _SL, _TO) when + is_integer(CL), CL == AS +-> {done, RqBdy}; get_body( #req_body{content_length = CL, acc_size = AccSize, buffer = Bin} = RqBdy, @@ -172,8 +172,8 @@ extend_buffer(ReqBody, Size, _Timeout) -> -spec extend_buffer( req_body(), pos_integer(), - non_neg_integer()|undefined -) -> + non_neg_integer() | undefined +) -> req_body(). extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> ReqBody#req_body{ @@ -211,7 +211,7 @@ slicing_fixed_length_test() -> ?assertMatch(Body, CompleteResult), ?assertMatch(<<>>, get_buffer(RqBdy3)), ?assertMatch(done, element(1, get_body(RqBdy3, 4 * 1024, 60 * 1000))), - + %% Request the full content-length in one shot {AllBin, RqBody4} = get_body(RqBdyInit, all, 60 * 1000), ?assertMatch(AllBin, Body), @@ -219,7 +219,7 @@ slicing_fixed_length_test() -> % Start with some of the first packet on the buffer, and end with % some of a pipelined request in the buffer - [FirstPacket|RestPackets] = Packets, + [FirstPacket | RestPackets] = Packets, <> = FirstPacket, DummyRequest = crypto:strong_rand_bytes(64), RqBdyAlt0 = @@ -227,7 +227,7 @@ slicing_fixed_length_test() -> buffer = OnBuffer, content_length = 11 * 1024, max_size = 1024 * 1024, - test_packets = [OnSocket|RestPackets] ++ [DummyRequest] + test_packets = [OnSocket | RestPackets] ++ [DummyRequest] }, {SliceAlt1, RqBdyAlt1} = get_body(RqBdyAlt0, 4 * 1024, 60 * 1000), {SliceAlt2, RqBdyAlt2} = get_body(RqBdyAlt1, 4 * 1024, 60 * 1000), @@ -241,22 +241,21 @@ slicing_fixed_length_test() -> <> ), SocketBin = iolist_to_binary(RqBdyAlt3#req_body.test_packets), - Remainder = <<(RqBdyAlt3#req_body.buffer)/binary, SocketBin/binary>>, - ?assertMatch(DummyRequest, Remainder) - . + Remainder = <<(RqBdyAlt3#req_body.buffer)/binary, SocketBin/binary>>, + ?assertMatch(DummyRequest, Remainder). packet_testbin(<<>>, Acc) -> lists:reverse(Acc); packet_testbin(<>, Acc) -> - packet_testbin(Rest, [Bin|Acc]). + packet_testbin(Rest, [Bin | Acc]). accrue_packets(Rest, 0, Buffer) -> {Buffer, Rest}; -accrue_packets([NextPacket|Rest], Size, Buffer) -> +accrue_packets([NextPacket | Rest], Size, Buffer) -> case Size of Needed when Needed < byte_size(NextPacket) -> <> = NextPacket, - {<>, [RestPacket|Rest]}; + {<>, [RestPacket | Rest]}; Needed -> accrue_packets( Rest, @@ -265,4 +264,4 @@ accrue_packets([NextPacket|Rest], Size, Buffer) -> ) end. --endif. \ No newline at end of file +-endif. diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 4062255..5b04d7d 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -1,4 +1,4 @@ -%% ------------------------------------------------------------------- +%% ------------------------------------------------------------------- %% %% Copyright (c) 2007 Mochi Media, Inc %% Copyright (c) 2026 Martin Sumner @@ -19,14 +19,14 @@ %% %% ------------------------------------------------------------------- %% @doc Case preserving (but case insensitive) HTTP Header dictionary. -%% +%% %% The headers are stored in a map, and the header keys will be an atom if %% in the standard list of headers decoded by Erlang/OTP - and otherwise a %% binary(). -%% +%% %% The values will always be binaries, comma(-and-space)-separated for values %% with multiple items -%% +%% %% The module was initially a refactoring of the mochiweb_headers module. -module(riak_api_web_headers). @@ -37,64 +37,74 @@ -export([output_response_block/1, parse_request_block/3]). -define(KV_SEPARATOR, <<": ">>). --define(V_SEPARATOR, <<", ">>). +-define(V_SEPARATOR, <<", ">>). -define(L_SEPARATOR, <<"\r\n">>). --record(headers, - { - type = request :: request|response, - %% response headers do not support the lookup of non-standard - %% header keys - and hence avoid the need to lower case those - %% keys for comparison - header_map = maps:new() :: header_map() - } -). +-record(headers, { + type = request :: request | response, + %% response headers do not support the lookup of non-standard + %% header keys - and hence avoid the need to lower case those + %% keys for comparison + header_map = maps:new() :: header_map() +}). -type standard_header_key() :: - 'Cache-Control' | - 'Connection' | - 'Date' | - 'Pragma' | - 'Transfer-Encoding' | - 'Upgrade' | - 'Via' | - 'Accept' | - 'Accept-Charset' | 'Accept-Encoding' | 'Accept-Language' | - 'Authorization' | - 'Proxy-Authorization' | 'Proxy-Authenticate' | 'Www-Authenticate' | - 'From' | - 'Host' | - 'If-Modified-Since' | 'If-Match' | 'If-None-Match' | - 'If-Range' | 'If-Unmodified-Since' | - 'Max-Forwards' | - 'Range' | - 'Referer' | - 'User-Agent' | - 'Age' | - 'Location' | - 'Public' | - 'Retry-After' | - 'Server' | - 'Vary' | - 'Warning' | - 'Allow' | - 'Content-Base' | 'Content-Encoding' | 'Content-Language' | - 'Content-Length' | 'Content-Location' | 'Content-Md5' | - 'Content-Range' | 'Content-Type' | - 'Etag' | - 'Expires' | - 'Last-Modified' | - 'Accept-Ranges' | - 'Set-Cookie' | - 'Set-Cookie2' | - 'X-Forwarded-For' | - 'Cookie' | - 'Keep-Alive' | - 'Proxy-Connection'. - % This list is controlled by Erlang/OTP - i.e. there may be further atoms - % added in the future, but it has been stable since OTP 13. + 'Cache-Control' + | 'Connection' + | 'Date' + | 'Pragma' + | 'Transfer-Encoding' + | 'Upgrade' + | 'Via' + | 'Accept' + | 'Accept-Charset' + | 'Accept-Encoding' + | 'Accept-Language' + | 'Authorization' + | 'Proxy-Authorization' + | 'Proxy-Authenticate' + | 'Www-Authenticate' + | 'From' + | 'Host' + | 'If-Modified-Since' + | 'If-Match' + | 'If-None-Match' + | 'If-Range' + | 'If-Unmodified-Since' + | 'Max-Forwards' + | 'Range' + | 'Referer' + | 'User-Agent' + | 'Age' + | 'Location' + | 'Public' + | 'Retry-After' + | 'Server' + | 'Vary' + | 'Warning' + | 'Allow' + | 'Content-Base' + | 'Content-Encoding' + | 'Content-Language' + | 'Content-Length' + | 'Content-Location' + | 'Content-Md5' + | 'Content-Range' + | 'Content-Type' + | 'Etag' + | 'Expires' + | 'Last-Modified' + | 'Accept-Ranges' + | 'Set-Cookie' + | 'Set-Cookie2' + | 'X-Forwarded-For' + | 'Cookie' + | 'Keep-Alive' + | 'Proxy-Connection'. +% This list is controlled by Erlang/OTP - i.e. there may be further atoms +% added in the future, but it has been stable since OTP 13. -type binary_header_key() :: unicode:chardata() | binary(). --type header_key() :: standard_header_key()|binary_header_key(). +-type header_key() :: standard_header_key() | binary_header_key(). -type header_value() :: {binary(), list(binary())}. -type header_map() :: #{header_key() => header_value()}. -type header_list() :: [{header_key(), binary()}]. @@ -107,7 +117,7 @@ %%% API %%%============================================================================ -%% @doc +%% @doc %% Construct a headers() from the given list of headers received in a %% request. -spec make([{header_key(), binary()}]) -> headers(). @@ -120,8 +130,7 @@ make(HeaderList) when is_list(HeaderList) -> %% With response headers it is not possible to lookup non-standard header keys, %% An the value may be a list if elements - that will be joined into a single %% comma-separated value before creating the response header. --spec make_rsp_header([{header_key(), list(binary())|binary()}] -) -> +-spec make_rsp_header([{header_key(), list(binary()) | binary()}]) -> headers(). make_rsp_header(HeaderList) -> HeaderMap = from_list(HeaderList, false), @@ -131,11 +140,11 @@ make_rsp_header(HeaderList) -> %% Insert pairs into the headers, replace any values for existing keys. %% Specifically used in response headers when setting ranges into existing %% headers. --spec enter_from_list([{header_key(), binary()}], headers() -) -> +-spec enter_from_list([{header_key(), binary()}], headers()) -> headers(). -enter_from_list(HeaderList, #headers{type = T, header_map = HM}) - when T == response -> +enter_from_list(HeaderList, #headers{type = T, header_map = HM}) when + T == response +-> #headers{ type = response, header_map = maps:merge(HM, from_list(HeaderList, false)) @@ -143,11 +152,11 @@ enter_from_list(HeaderList, #headers{type = T, header_map = HM}) %% @doc %% Insert pairs into response headers for keys that do not already exist. --spec default_from_list([{header_key(), binary()}], headers() -) -> +-spec default_from_list([{header_key(), binary()}], headers()) -> headers(). -default_from_list(HeaderList, #headers{type = T, header_map = HM}) - when T == response -> +default_from_list(HeaderList, #headers{type = T, header_map = HM}) when + T == response +-> #headers{ type = response, header_map = maps:merge(from_list(HeaderList, false), HM) @@ -155,11 +164,11 @@ default_from_list(HeaderList, #headers{type = T, header_map = HM}) %% @doc %% Add a single value for a single key to the response map --spec enter(header_key(), binary(), headers() -) -> +-spec enter(header_key(), binary(), headers()) -> headers(). -enter(HeaderKey, Value, #headers{type = T, header_map = HM}) - when T == response -> +enter(HeaderKey, Value, #headers{type = T, header_map = HM}) when + T == response +-> {HK, HV} = normalize_header({HeaderKey, Value}, false), #headers{ type = response, @@ -172,8 +181,7 @@ enter(HeaderKey, Value, #headers{type = T, header_map = HM}) %% For non-standard (binary) keys use lookup/2. %% If the values was a comma-separated list, or multiple headers have been %% folded together - then a list rather than a single value is returned. --spec get_value(standard_header_key(), headers() -) -> +-spec get_value(standard_header_key(), headers()) -> unicode:chardata() | list(unicode:chardata()) | undefined. get_value(K, H) when is_atom(K) -> case maps:get(K, H#headers.header_map, undefined) of @@ -189,8 +197,7 @@ get_value(K, H) when is_atom(K) -> %% If multiple values may be provided for a field, but it is illegal %% for those values to differ (e.g. in the case of content-length), only return %% a value, if there is only one unique value. --spec get_unique_value(standard_header_key(), headers() -) -> +-spec get_unique_value(standard_header_key(), headers()) -> unicode:chardata() | undefined | {error, multiple_values}. get_unique_value(K, H) -> case maps:get(K, H#headers.header_map, undefined) of @@ -221,9 +228,8 @@ parse_primary_header_value(HeaderValue) -> %% @doc %% Fetch the {original key, values} for a binary (non-standard) header key. %% There is a boolean flag to indicate if the key has already been subject to -%% casefold. --spec lookup(binary_header_key(), headers(), boolean() -) -> +%% casefold. +-spec lookup(binary_header_key(), headers(), boolean()) -> {binary(), list(unicode:chardata())} | undefined. lookup(CaseFoldedKey, H, true) when is_binary(CaseFoldedKey) -> maps:get(CaseFoldedKey, H#headers.header_map, undefined); @@ -234,8 +240,7 @@ lookup(RawKey, Headers, false) when is_binary(RawKey) -> %% Fetch a list of non-standard headers with a given prefix. The list is a %% list of {K, [V]} where K is the remainder of the original key once the %% original prefix has been stripped --spec prefix_fold(binary_header_key(), headers(), boolean() -) -> +-spec prefix_fold(binary_header_key(), headers(), boolean()) -> list({unicode:chardata(), list(unicode:chardata())}). prefix_fold(CaseFoldPrefix, Headers, true) when is_binary(CaseFoldPrefix) -> Keys = maps:keys(Headers#headers.header_map), @@ -296,14 +301,14 @@ parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {HeaderAcc, C}) -> Rest, BufferFun, {MaxCount, MaxSize}, - {[{Key, Value}|HeaderAcc], C + 1} + {[{Key, Value} | HeaderAcc], C + 1} ); {ok, {http_header, _, _Key, OrigKey, Value}, Rest} -> parse_request_block( Rest, BufferFun, {MaxCount, MaxSize}, - {[{OrigKey, Value}|HeaderAcc], C + 1} + {[{OrigKey, Value} | HeaderAcc], C + 1} ); {ok, http_eoh, Rest} -> {ok, make(HeaderAcc), Rest}; @@ -346,18 +351,17 @@ join_values(VL) -> list(header_value()). filter_headers([], _Prefix, _PL, _HMap, Acc) -> Acc; -filter_headers([Key|RestKeys], Prefix, PL, HMap, Acc) -> +filter_headers([Key | RestKeys], Prefix, PL, HMap, Acc) -> case Key of - <> -> + <> -> {<<_Ignore:PL/binary, Suffix/binary>>, Values} = maps:get(Key, HMap), - filter_headers(RestKeys, Prefix, PL, HMap, [{Suffix, Values}|Acc]); + filter_headers(RestKeys, Prefix, PL, HMap, [{Suffix, Values} | Acc]); _ -> filter_headers(RestKeys, Prefix, PL, HMap, Acc) end. --spec from_list([{header_key(), binary()|list(binary())}], boolean() -) -> +-spec from_list([{header_key(), binary() | list(binary())}], boolean()) -> header_map(). from_list(HeaderList, IsReqHeader) -> lists:foldl( @@ -375,8 +379,8 @@ from_list(HeaderList, IsReqHeader) -> ). -spec normalize_header( - {header_key(), binary()|list(binary())}, boolean() -) -> + {header_key(), binary() | list(binary())}, boolean() +) -> {header_key(), header_value()}. normalize_header({KAtom, Value}, _) when is_atom(KAtom) -> {KAtom, {atom_to_binary(KAtom), normalize_value(Value)}}; @@ -385,15 +389,15 @@ normalize_header({KBin, Value}, true) when is_binary(KBin) -> normalize_header({KBin, Value}, false) when is_binary(KBin) -> {KBin, {KBin, normalize_value(Value)}}. --spec normalize_key(standard_header_key()) -> standard_header_key(); +-spec normalize_key + (standard_header_key()) -> standard_header_key(); (binary_header_key()) -> binary_header_key(). normalize_key(KAtom) when is_atom(KAtom) -> KAtom; normalize_key(KBin) when is_binary(KBin) -> string:casefold(KBin). --spec normalize_value(binary()|list(binary()) -) -> +-spec normalize_value(binary() | list(binary())) -> list(binary()). normalize_value(MultipleValues) when is_list(MultipleValues) -> lists:filter(fun is_binary/1, MultipleValues); @@ -465,8 +469,7 @@ parse_block_tester(RequestHeader1, RequestHeader2) -> ?assertMatch( <<"1024">>, get_unique_value('Content-Length', Headers) - ) - . + ). riak_metadata_test() -> RequestHeader1 = @@ -523,8 +526,10 @@ response_header_test() -> {'Server', <<"Riak Web API">>}, {'Content-Length', <<"1024">>}, {'Etag', <<"sometag">>}, - {<<"X-Riak-Index-field1_bin">>, [<<"NAME1|DOB1">>, <<"NAME2|DOB1">>]}, - {<<"X-Riak-Index-field1_bin">>, <<"NAME3|DOB1 ">>}, + {<<"X-Riak-Index-field1_bin">>, [ + <<"NAME1|DOB1">>, <<"NAME2|DOB1">> + ]}, + {<<"X-Riak-Index-field1_bin">>, <<"NAME3|DOB1 ">>}, {<<"X-Riak-Index-field2_bin">>, <<"POSTCODE1|DOB1">>} ], RespHeaders1 = make_rsp_header(InitHeaders), @@ -551,7 +556,6 @@ response_header_test() -> "X-Riak-Index-field1_bin: NAME3|DOB1, NAME1|DOB1, NAME2|DOB1\r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" >>, - ?assertMatch(ExpectedResponse, Response) - . + ?assertMatch(ExpectedResponse, Response). -endif. diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 919f0cc..daf5090 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -8,12 +8,12 @@ -spec is_authorised( boolean(), - http|https, + http | https, riak_api_web_headers:headers(), {ip, inet:ip_address()} -) -> - {ok, riak_core_security:context() | undefined} | - riak_api_web_acceptor:halt_response(). +) -> + {ok, riak_core_security:context() | undefined} + | riak_api_web_acceptor:halt_response(). is_authorised(Enabled, Scheme, ReqHeaders, Peer) -> is_authorised( Enabled, @@ -27,8 +27,8 @@ is_authorised(Enabled, Scheme, ReqHeaders, Peer) -> is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> case riak_api_web_headers:get_unique_value('Authorization', ReqHeaders) of - << ?AUTH_PREFIX, Base64UP/binary>> -> - try + <> -> + try UserPass = base64:decode(Base64UP), [User, Pass] = string:lexemes(UserPass, ":"), case AuthFun(User, Pass, [Peer]) of @@ -38,7 +38,7 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> {halt, 401, <<"~0p">>, [Error]} end catch - _ : ExError -> + _:ExError -> ?LOG_WARNING("Error decoding credentials ~0p", [ExError]), {halt, 400, none, <<"Error decoding credentials">>, []} end; diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 77a4076..03ce0d2 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -1,4 +1,4 @@ -%% ------------------------------------------------------------------- +%% ------------------------------------------------------------------- %% %% Copyright (c) 2007 Mochi Media, Inc %% Copyright (c) 2026 Martin Sumner @@ -19,26 +19,26 @@ %% %% ------------------------------------------------------------------- %% @doc Socket and acceptor pool management for web requests -%% +%% %% Socket manager intended to abstract away from choice of SSL, and also %% maintain a pool of accept processes that are ready to accept new connection %% requests -%% +%% %% Each acceptor is an `riak_api_web_acceptor` - an as each acceptor accepts %% a connection, it will prompt this socket server to launch a new acceptor. %% When a linked acceptor closes (along with the connection), the close message %% is handled and the closed acceptor is removed from the pool. -%% +%% %% The intention is that there should always be at least the pool size of %% acceptors waiting for a connection - unless the max size is reached, and no %% new acceptors will be started. This means that concurrently no more %% connections can be handled concurrently than the max pool size. -%% +%% %% The module was initially based on the: %% - mochiweb_socket_server %% - mochiweb_socket %% - mochiweb_acceptor -%% +%% %% Patterns used in these modules have been compared with the Elli web server %% for validation - https://github.com/elli-lib/elli. @@ -81,65 +81,63 @@ -define(POOL_SIZE_DEFAULT, 16). -define(POOL_SIZE_MAX_DEFAULT, 2048). --record(socket_state, - { - port :: inet:port_number(), - listener :: socket(), - pool_size = ?POOL_SIZE_DEFAULT :: pos_integer(), - max_pool_size = ?POOL_SIZE_MAX_DEFAULT :: pos_integer(), - acceptor_pool = sets:new([{version, 2}]) :: sets:set() - } -). +-record(socket_state, { + port :: inet:port_number(), + listener :: socket(), + pool_size = ?POOL_SIZE_DEFAULT :: pos_integer(), + max_pool_size = ?POOL_SIZE_MAX_DEFAULT :: pos_integer(), + acceptor_pool = sets:new([{version, 2}]) :: sets:set() +}). -type socket_option() :: - {ip, inet:ip_address()} | - binary | - {reuseaddr, boolean()} | - %% Assumed necessary to allow for rapid restart of supervised - %% process - e.g. allow for next process to listen on socket even - %% when the previous process has not completed the close - {packet, raw} | - {active, boolean()} - %% After a connection is accepted the socket is manually read to be - %% decoded - . + {ip, inet:ip_address()} + | binary + | {reuseaddr, boolean()} + %% Assumed necessary to allow for rapid restart of supervised + %% process - e.g. allow for next process to listen on socket even + %% when the previous process has not completed the close + | {packet, raw} + | {active, boolean()} +%% After a connection is accepted the socket is manually read to be +%% decoded +. -type buffer_option() :: - {recbuf, pos_integer()} | - {sndbuf, pos_integer()} | - {buffer, pos_integer()} - % The size of the user-level buffer used by the driver. - % Not to be confused with options sndbuf and recbuf, which correspond - % to the Kernel socket buffers. For TCP it is recommended to have - % val(buffer) >= val(recbuf) to avoid performance issues because - % of unnecessary copying - . + {recbuf, pos_integer()} + | {sndbuf, pos_integer()} + | {buffer, pos_integer()} +% The size of the user-level buffer used by the driver. +% Not to be confused with options sndbuf and recbuf, which correspond +% to the Kernel socket buffers. For TCP it is recommended to have +% val(buffer) >= val(recbuf) to avoid performance issues because +% of unnecessary copying +. -type server_name() :: binary(). - % Name of the root part of the address i.e. - % <<"Protocol://Host:Port">> +% Name of the root part of the address i.e. +% <<"Protocol://Host:Port">> -type option() :: - {acceptor_pool_start_size, pos_integer()} | - % The number of acceptors to be ready to accept an new connection. - % This pool size is not a limit, it is is the starting size. As an - % acceptor picks up a new connection request it will prompt for a new - % acceptor to be spawned (and will not return to the pool once it is - % complete). - {acceptor_pool_max_size, pos_integer()} | - % The maximum number of acceptors in the pool - the total number of - % concurrent requests that can be supported on this port - {ssl, boolean()} | - {ssl_opts, [ssl:tls_server_option()]} | - {ip, inet:ip_address()} | - {port, inet:port_number()} | - {name, server_name()}. - --type scheme() :: http|https. + {acceptor_pool_start_size, pos_integer()} + % The number of acceptors to be ready to accept an new connection. + % This pool size is not a limit, it is is the starting size. As an + % acceptor picks up a new connection request it will prompt for a new + % acceptor to be spawned (and will not return to the pool once it is + % complete). + | {acceptor_pool_max_size, pos_integer()} + % The maximum number of acceptors in the pool - the total number of + % concurrent requests that can be supported on this port + | {ssl, boolean()} + | {ssl_opts, [ssl:tls_server_option()]} + | {ip, inet:ip_address()} + | {port, inet:port_number()} + | {name, server_name()}. + +-type scheme() :: http | https. -type web_options() :: list(option()). --type socket() :: {http, gen_tcp:socket()}|{https, ssl:sslsocket()}. +-type socket() :: {http, gen_tcp:socket()} | {https, ssl:sslsocket()}. -type tcp_error() :: closed | timeout | system_limit | inet:posix(). -type tls_error() :: term(). @@ -192,7 +190,7 @@ acceptor_accepted(Pid) -> %%%============================================================================ init(Options) -> - BufferOpts = + BufferOpts = case get_tcp_buffer_options() of [] -> []; @@ -280,7 +278,6 @@ handle_info({'EXIT', Pid, normal}, State) -> handle_info({'EXIT', Pid, Reason}, State) -> ?LOG_ERROR("Acceptor ~p unexpectedly crashed: ~0p", [Pid, Reason]), handle_info({'EXIT', Pid, normal}, State). - %%%============================================================================ %%% Internal Functions @@ -291,13 +288,12 @@ default_socket_options(IPAddr) -> [ {ip, IPAddr}, binary, - {reuseaddr, true}, + {reuseaddr, true}, {packet, raw}, {active, false} ]. --spec get_acceptor_pool(socket(), list(option()) -) -> +-spec get_acceptor_pool(socket(), list(option())) -> {list(pid()), pos_integer(), pos_integer()}. get_acceptor_pool(Listener, Options) -> StartSize = @@ -323,10 +319,11 @@ get_acceptor_pool(Listener, Options) -> ) end, case {StartSize, MaxSize} of - {StartSize, MaxSize} when - is_integer(StartSize), - is_integer(MaxSize), - MaxSize >= StartSize -> + {StartSize, MaxSize} when + is_integer(StartSize), + is_integer(MaxSize), + MaxSize >= StartSize + -> {start_acceptor_pool(Listener, StartSize), StartSize, MaxSize}; InvalidConfig -> ?LOG_ERROR( @@ -336,7 +333,7 @@ get_acceptor_pool(Listener, Options) -> ), { start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT), - ?POOL_SIZE_DEFAULT, + ?POOL_SIZE_DEFAULT, ?POOL_SIZE_MAX_DEFAULT } end. @@ -365,10 +362,10 @@ get_tcp_buffer_options() -> get_tcp_buffer_options([], BufferOptions) -> BufferOptions; -get_tcp_buffer_options([{Name, EnVar}|Rest], BufferOptions) -> +get_tcp_buffer_options([{Name, EnVar} | Rest], BufferOptions) -> case application:get_env(riak_api, EnVar) of {ok, BSize} when is_integer(BSize) -> - get_tcp_buffer_options(Rest, [{Name, BSize}|BufferOptions]); + get_tcp_buffer_options(Rest, [{Name, BSize} | BufferOptions]); _ -> get_tcp_buffer_options(Rest, BufferOptions) end. @@ -383,7 +380,7 @@ get_scheme({Scheme, _Socket}) -> list(socket_option()), list(buffer_option()), none | list(ssl:tls_server_option()) -) -> +) -> {ok, socket()} | {error, any()}. listen(http, Port, SocketOpts, BufferOpts, none) -> case gen_tcp:listen(Port, SocketOpts ++ BufferOpts) of @@ -403,8 +400,8 @@ listen(https, Port, SocketOpts, BufferOpts, SSLOpts) when SSLOpts =/= none -> -spec accept( socket(), pos_integer() -) -> - {ok, socket()} |{error, tcp_error()|tls_error()}. +) -> + {ok, socket()} | {error, tcp_error() | tls_error()}. accept({http, Socket}, Timeout) -> case gen_tcp:accept(Socket, Timeout) of {ok, S} -> @@ -429,7 +426,7 @@ accept({https, Socket}, Timeout) -> socket(), non_neg_integer(), non_neg_integer() | infinity -) -> +) -> {ok, binary()} | {error, any()}. recv({http, Socket}, Size, Timeout) -> case gen_tcp:recv(Socket, Size, Timeout) of @@ -472,4 +469,4 @@ get_peer({https, Socket}) -> {ok, Addr}; {error, Error} -> {error, Error} - end. \ No newline at end of file + end. From ea3034b66e1397d64850b17412784814d54e47ea Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2026 10:38:06 +0000 Subject: [PATCH 04/53] Add basic chunking support (receive body) --- src/riak_api_web_acceptor.erl | 20 ++- src/riak_api_web_body.erl | 253 ++++++++++++++++++++++++++++++++-- src/riak_api_web_security.erl | 22 +++ src/riak_api_web_socket.erl | 29 ++++ 4 files changed, 303 insertions(+), 21 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 7c4f17c..5161bd8 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -37,13 +37,21 @@ -define(CONTINUE_RESPONSE, <<"HTTP 1.1 100 Continue">>). -type response_code() :: - 400 - | 413 + 200..204 + | 300..304 + | 400 + | 401 + | 403..406 + | 408..415 + | 428..429 | 431 - | 200. + | 500 + | 503 + | 505 + | 507. -type method() :: - 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE' | 'OPTIONS' | 'TRACE'. + 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE'. -type http_version() :: {1, 0} | {1, 1}. @@ -363,9 +371,7 @@ get_request_line(Socket, Buffer) -> SM == 'HEAD'; SM == 'POST'; SM == 'PUT'; - SM == 'DELETE'; - SM == 'OPTIONS'; - SM == 'TRACE' + SM == 'DELETE' -> {ok, {SM, Path, SV, Rest}}; _USM -> diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index e424594..8349ef1 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -33,14 +33,16 @@ acc_size = 0 :: non_neg_integer(), max_size :: pos_integer(), buffer_fun :: buffer_fun(), + chunk_buff = <<>> :: binary(), test_packets = [] :: list(binary()) % only used in tests }). -type req_body() :: #req_body{}. +-type fetch_req() :: pos_integer() | line. -type buffer_fun() :: - fun((binary(), pos_integer(), non_neg_integer() | undefined) -> binary()). + fun((binary(), fetch_req(), non_neg_integer() | undefined) -> binary()). -export_type([req_body/0, buffer_fun/0]). @@ -150,11 +152,102 @@ get_body( end end; get_body( - #req_body{content_length = CL, max_size = MS, acc_size = AS}, - _SL, - _TO -) when CL == chunked, AS > MS -> - {error, content_too_large}. + #req_body{content_length = CL, max_size = MS, acc_size = AS} = RqBdy, + all, + TO +) when CL == chunked -> + case erlang:decode_packet(line, RqBdy#req_body.buffer, []) of + {ok, <<"\r\n">>, Rest} when is_binary(Rest) -> + get_body( + extend_buffer( + RqBdy#req_body{buffer = Rest}, + line, + TO + ), + all, + TO + ); + {ok, Line, Rest} when is_binary(Line) -> + ChunkSize = get_chunk_size(Line), + RcvBuffer = RqBdy#req_body.chunk_buff, + case {ChunkSize, ChunkSize + AS} of + {0, _} -> + FinalRqBdy = + case Rest of + <<>> -> + extend_buffer( + RqBdy#req_body{buffer = <<>>}, + line, + TO + ); + Rest when is_binary(Rest) -> + RqBdy#req_body{buffer = Rest} + end, + <<"\r\n", Next/binary>> = get_buffer(FinalRqBdy), + { + RcvBuffer, + FinalRqBdy#req_body{buffer = Next, chunk_buff = <<>>} + }; + {N, NextSize} when N > 0, NextSize =< MS -> + case byte_size(Rest) of + BS when BS >= ChunkSize -> + <> + = Rest, + get_body( + RqBdy#req_body{ + buffer = FurtherChunks, + chunk_buff = + <>, + acc_size = AS + ChunkSize + }, + all, + TO + ); + BS -> + Needed = ChunkSize - BS, + UpdRqBdy = + extend_buffer( + RqBdy#req_body{buffer = Rest}, + Needed, + TO + ), + Chunk = get_buffer(UpdRqBdy), + get_body( + UpdRqBdy#req_body{ + buffer = <<>>, + chunk_buff = + <>, + acc_size = AS + ChunkSize + }, + all, + TO + ) + end; + {_N, _TooBig} -> + {error, content_too_large} + end; + {more, _} -> + % have to get the whole receive buffer, or get one byte at a + % time - don't want to ask for more than one byte + get_body( + extend_buffer(RqBdy, line, TO), + all, + TO + ) + end. + +-spec get_chunk_size(binary()) -> non_neg_integer(). +get_chunk_size(Line) -> + case binary:split(string:trim(Line), <<";">>) of + [ChunkLength] -> + binary_to_integer(ChunkLength, 16); + [ChunkLength, _Ignore] -> + % There may be a chunk extension after a semi-colon for + % progress tracking. + % However, these are not expected in our use case, and + % could hide security issues - so will ignore. + binary_to_integer(ChunkLength, 16) + end. -ifdef(TEST). extend_buffer(ReqBody, Size, _Timeout) -> @@ -164,21 +257,17 @@ extend_buffer(ReqBody, Size, _Timeout) -> Size, ReqBody#req_body.buffer ), - ReqBody#req_body{ - buffer = NextBin, - test_packets = RestPackets - }. + ReqBody#req_body{buffer = NextBin, test_packets = RestPackets}. -else. -spec extend_buffer( req_body(), - pos_integer(), + pos_integer() | line, non_neg_integer() | undefined ) -> req_body(). extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> ReqBody#req_body{ - buffer = - BufferFun(ReqBody#req_body.buffer, Size, Timeout) + buffer = BufferFun(ReqBody#req_body.buffer, Size, Timeout) }. -endif. @@ -244,6 +333,133 @@ slicing_fixed_length_test() -> Remainder = <<(RqBdyAlt3#req_body.buffer)/binary, SocketBin/binary>>, ?assertMatch(DummyRequest, Remainder). +all_in_buffer_test() -> + Body = crypto:strong_rand_bytes(11 * 1024), + RqBdyInit = + #req_body{ + buffer = Body, + content_length = 11 * 1024, + max_size = 1024 * 1024, + test_packets = [] + }, + {Slice1, RqBdy1} = get_body(RqBdyInit, 4 * 1024, 60 * 1000), + {Slice2, RqBdy2} = get_body(RqBdy1, 4 * 1024, 60 * 1000), + {Slice3, RqBdy3} = get_body(RqBdy2, 4 * 1024, 60 * 1000), + ?assertMatch(4096, byte_size(Slice1)), + ?assertMatch(4096, byte_size(Slice2)), + ?assertMatch(3072, byte_size(Slice3)), + CompleteResult = <>, + ?assertMatch(Body, CompleteResult), + ?assertMatch(<<>>, get_buffer(RqBdy3)), + ?assertMatch(done, element(1, get_body(RqBdy3, 4 * 1024, 60 * 1000))). + +get_empty_body_test() -> + RqBdyInit = + #req_body{ + buffer = <<"0\r\n\r\n">>, + content_length = chunked, + max_size = 1024 * 1024, + test_packets = [] + }, + {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), + ?assertMatch(<<>>, Output), + ?assertMatch(<<>>, get_buffer(RqBdyEnd)). + +get_empty_body_with_pipelined_request_test() -> + RqBdyInit = + #req_body{ + buffer = <<"0\r\n\r\nGET /stats HTTP/1.1\r\n">>, + content_length = chunked, + max_size = 1024 * 1024, + test_packets = [] + }, + {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), + ?assertMatch(<<>>, Output), + ?assertMatch(<<"GET /stats HTTP/1.1\r\n">>, get_buffer(RqBdyEnd)). + +get_standard_wikipedia_test() -> + Packets = + [ + <<"4\r\n">>, + <<"Wiki\r\n">>, + <<"5\r\n">>, + <<"pedia\r\n">>, + <<"e\r\n">>, + <<" in\r\n\r\nchunks.\r\n">>, + <<"0\r\n">>, + <<"\r\n">> + ], + RqBdyInit = + #req_body{ + buffer = <<"">>, + content_length = chunked, + max_size = 1024 * 1024, + test_packets = Packets + }, + {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), + ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), + ?assertMatch(<<>>, get_buffer(RqBdyEnd)). + +get_wikipedia_from_buffer_test() -> + {ok, RqBdyInit} = + initiate_body( + fun(B, _, _) -> B end, + <<"4\r\nWiki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n">>, + chunked, + false, + 1024 * 1024 + ), + OtherPackets = [<<"0\r\n">>, <<"\r\n">>], + RqBdy = RqBdyInit#req_body{test_packets = OtherPackets}, + {Output, RqBdyEnd} = get_body(RqBdy, all, 1000), + ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), + ?assertMatch(<<>>, get_buffer(RqBdyEnd)). + +ignore_extension_test() -> + Packets = + [ + <<"4;ext\r\n">>, + <<"Wiki\r\n">>, + <<"5;somert">>, + <<"\r\n">>, + <<"pedia\r\n">>, + <<"e\r\n">>, + <<" in\r\n\r\nchunks.\r\n">>, + <<"0;other\r\n">>, + <<"\r\n">> + ], + RqBdyInit = + #req_body{ + buffer = <<"">>, + content_length = chunked, + max_size = 1024 * 1024, + test_packets = Packets + }, + {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), + ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), + ?assertMatch(<<>>, get_buffer(RqBdyEnd)). + +toobig_chunking_test() -> + Packets = + [ + <<"4\r\n">>, + <<"Wiki\r\n">>, + <<"5\r\n">>, + <<"pedia\r\n">>, + <<"e\r\n">>, + <<" in\r\n\r\nchunks.\r\n">>, + <<"0\r\n">>, + <<"\r\n">> + ], + RqBdyInit = + #req_body{ + buffer = <<"">>, + content_length = chunked, + max_size = 20, + test_packets = Packets + }, + ?assertMatch({error, content_too_large}, get_body(RqBdyInit, all, 1000)). + packet_testbin(<<>>, Acc) -> lists:reverse(Acc); packet_testbin(<>, Acc) -> @@ -251,7 +467,16 @@ packet_testbin(<>, Acc) -> accrue_packets(Rest, 0, Buffer) -> {Buffer, Rest}; -accrue_packets([NextPacket | Rest], Size, Buffer) -> +accrue_packets([], line, Buffer) -> + {Buffer, []}; +accrue_packets([NextPacket|Rest], line, Buffer) -> + case erlang:decode_packet(line, NextPacket, []) of + {ok, Line, Overhang} -> + {<>, [Overhang|Rest]}; + {more, _} -> + accrue_packets(Rest, line, <>) + end; +accrue_packets([NextPacket | Rest], Size, Buffer) when is_integer(Size) -> case Size of Needed when Needed < byte_size(NextPacket) -> <> = NextPacket, diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index daf5090..9121974 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -1,4 +1,26 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2007-2009 Basho Technologies +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% %% @doc Some security helper functions for Riak API endpoints + -module(riak_api_web_security). -include_lib("kernel/include/logger.hrl"). diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 03ce0d2..c759c03 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -69,6 +69,7 @@ get_scheme/1, accept/2, recv/3, + recv_line/2, send/2, close/1, get_peer/1, @@ -443,6 +444,34 @@ recv({https, Socket}, Size, Timeout) -> {error, Error} end. +-spec recv_line( + socket(), + non_neg_integer() | infinity +) -> + {ok, binary()} | {error, any()}. +recv_line({http, Socket}, Timeout) -> + maybe + ok ?= inet:setopts(Socket, [{packet, line}]), + {ok, Data} ?= gen_tcp:recv(Socket, 0, Timeout), + ok ?= inet:setopts(Socket, [{packet, raw}]), + true = is_binary(Data), + {ok, Data} + else + {error, Error} -> + {error, Error} + end; +recv_line({https, Socket}, Timeout) -> + maybe + ok ?= ssl:setopts(Socket, [{packet, line}]), + {ok, Data} ?= ssl:recv(Socket, 0, Timeout), + ok ?= ssl:setopts(Socket, [{packet, raw}]), + true = is_binary(Data), + {ok, Data} + else + {error, Error} -> + {error, Error} + end. + -spec send(socket(), binary()) -> ok | {error, any()}. send({http, Socket}, Data) -> gen_tcp:send(Socket, Data); From b815a5c5c452d32954bd7df905714744d8ec9975 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2026 12:16:03 +0000 Subject: [PATCH 05/53] Add further tests - plus date caching --- src/riak_api_web_acceptor.erl | 60 ++++++++++++++++++++++++-- src/riak_api_web_body.erl | 22 +++++----- src/riak_api_web_security.erl | 80 +++++++++++++++++++++++++++++++++++ src/riak_api_web_socket.erl | 1 + 4 files changed, 149 insertions(+), 14 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 5161bd8..0105327 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4]). +-export([extend_buffer/4, start_clock/0]). -include_lib("kernel/include/logger.hrl"). @@ -548,6 +548,60 @@ send_response(_RspCode, _RspHeaders, _RspBody, _Socket) -> _Version = get_version(), ok. +start_clock() -> + ets:new( + ?MODULE, + [named_table, {read_concurrency, true}] + ). + -spec default_response_headers(boolean()) -> riak_api_web_headers:headers(). -default_response_headers(_Keepalive) -> - riak_api_web_headers:make_rsp_header([]). +default_response_headers(KeepAlive) -> + DateHeader = + case {os:system_time(second), ets:lookup(?MODULE, rfc1123)} of + {Now, [{rfc1123, {CachedTime, CachedHdr}}]} when + Now == CachedTime + -> + CachedHdr; + {Now, _} -> + Hdr = {'Date', list_to_binary(httpd_util:rfc1123_date())}, + ets:insert(?MODULE, {rfc1123, {Now, Hdr}}), + Hdr + end, + ServerHeader = {'Server', <<"RiakAPI/4.0 SilverMachine">>}, + ConnectionHeader = + case KeepAlive of + true -> + {'Connection', <<"keep-alive">>}; + false -> + {'Connection', <<"close">>} + end, + riak_api_web_headers:make_rsp_header( + [ServerHeader, DateHeader, ConnectionHeader] + ). + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +clock_test() -> + start_clock(), + {TC1, _Hdrs1} = timer:tc(fun() -> default_response_headers(true) end), + {TC2, _Hdrs2} = timer:tc(fun() -> default_response_headers(true) end), + {TC3, _Hdrs3} = timer:tc(fun() -> default_response_headers(false) end), + {TC4, _Hdrs4} = timer:tc(fun() -> default_response_headers(true) end), + timer:sleep(1000), + {TC5, _Hdrs5} = timer:tc(fun() -> default_response_headers(true) end), + ?assertMatch(1, ets:info(?MODULE, size)), + MeanUnCached = (TC1 + TC5) div 2, + MeanCached = (TC2 + TC3 + TC4) div 3, + io:format( + user, + "Cached ~w micros vs uncached ~w~n", + [MeanCached, MeanUnCached] + ), + ?assert(MeanCached < MeanUnCached). + +-endif. diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index 8349ef1..ac2772d 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -191,8 +191,8 @@ get_body( {N, NextSize} when N > 0, NextSize =< MS -> case byte_size(Rest) of BS when BS >= ChunkSize -> - <> - = Rest, + <> = + Rest, get_body( RqBdy#req_body{ buffer = FurtherChunks, @@ -386,8 +386,8 @@ get_standard_wikipedia_test() -> <<"pedia\r\n">>, <<"e\r\n">>, <<" in\r\n\r\nchunks.\r\n">>, - <<"0\r\n">>, - <<"\r\n">> + <<"0\r\n">>, + <<"\r\n">> ], RqBdyInit = #req_body{ @@ -425,8 +425,8 @@ ignore_extension_test() -> <<"pedia\r\n">>, <<"e\r\n">>, <<" in\r\n\r\nchunks.\r\n">>, - <<"0;other\r\n">>, - <<"\r\n">> + <<"0;other\r\n">>, + <<"\r\n">> ], RqBdyInit = #req_body{ @@ -448,8 +448,8 @@ toobig_chunking_test() -> <<"pedia\r\n">>, <<"e\r\n">>, <<" in\r\n\r\nchunks.\r\n">>, - <<"0\r\n">>, - <<"\r\n">> + <<"0\r\n">>, + <<"\r\n">> ], RqBdyInit = #req_body{ @@ -469,13 +469,13 @@ accrue_packets(Rest, 0, Buffer) -> {Buffer, Rest}; accrue_packets([], line, Buffer) -> {Buffer, []}; -accrue_packets([NextPacket|Rest], line, Buffer) -> +accrue_packets([NextPacket | Rest], line, Buffer) -> case erlang:decode_packet(line, NextPacket, []) of {ok, Line, Overhang} -> - {<>, [Overhang|Rest]}; + {<>, [Overhang | Rest]}; {more, _} -> accrue_packets(Rest, line, <>) - end; + end; accrue_packets([NextPacket | Rest], Size, Buffer) when is_integer(Size) -> case Size of Needed when Needed < byte_size(NextPacket) -> diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 9121974..2f42374 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -79,5 +79,85 @@ is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +simple_security_test() -> + User1 = <<"User1">>, + User2 = <<"User2">>, + User3 = <<"User3">>, + Pass1 = <<"Pass1!">>, + Pass2 = <<"Pass2!">>, + Pass3 = <<"Pass3!">>, + AuthMap = #{User1 => Pass1, User2 => Pass2, User3 => Pass3}, + AuthFun = + fun(User, Pass, _IgnorePeer) when is_binary(Pass) -> + case maps:get(User, AuthMap, undefined) of + Pass -> + {ok, ok}; + _ -> + {error, invalid_credentials} + end + end, + Combo1 = base64:encode(iolist_to_binary([User1, <<":">>, Pass1])), + ?assertMatch( + {ok, ok}, + is_authorised( + true, + https, + make_request_headers(Combo1), + {ip, {127, 0, 0, 1}}, + AuthFun + ) + ), + ?assertMatch( + {halt, 400, none, <<"Error decoding credentials">>, []}, + is_authorised( + true, + https, + make_request_headers(iolist_to_binary([Combo1, <<"A">>])), + {ip, {127, 0, 0, 1}}, + AuthFun + ) + ), + BadCombo = base64:encode(iolist_to_binary([User2, <<":">>, Pass1])), + ?assertMatch( + {halt, 401, <<"~0p">>, [invalid_credentials]}, + is_authorised( + true, + https, + make_request_headers(BadCombo), + {ip, {127, 0, 0, 1}}, + AuthFun + ) + ), + Combo2 = base64:encode(iolist_to_binary([User2, <<":">>, Pass2])), + MultipleHeaders = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {<<"X-Riak-VClock">>, <<"ABC123==">>}, + {'Authorization', iolist_to_binary([<<"Basic ">>, Combo1])}, + {'Authorization', iolist_to_binary([<<"Basic ">>, Combo2])} + ] + ), + ?assertMatch( + {halt, 400, none, <<"Error decoding credentials">>, []}, + is_authorised( + true, + https, + MultipleHeaders, + {ip, {127, 0, 0, 1}}, + AuthFun + ) + ). + +make_request_headers(Combo) -> + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {<<"X-Riak-VClock">>, <<"ABC123==">>}, + {'Authorization', iolist_to_binary([<<"Basic ">>, Combo])} + ] + ). -endif. diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index c759c03..fbaa459 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -219,6 +219,7 @@ init(Options) -> "Acceptor pool for web started on IP ~0p port ~w of size ~w", [IP, Port, StartSize] ), + riak_api_web_acceptor:start_clock(), { ok, #socket_state{ From 3c11b94b922cda53a10e2363a022cd35250ffd3a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2026 17:58:38 +0000 Subject: [PATCH 06/53] Add behaviour Also add callback module and unit test the sending of responses both streamed and whole. --- rebar.config | 1 + src/riak_api_web_acceptor.erl | 241 +++++++++++++++++++++++++++++----- src/riak_api_web_handler.erl | 162 +++++++++++++++++++++++ src/riak_api_web_socket.erl | 2 +- 4 files changed, 374 insertions(+), 32 deletions(-) create mode 100644 src/riak_api_web_handler.erl diff --git a/rebar.config b/rebar.config index c022a27..e68ce2b 100644 --- a/rebar.config +++ b/rebar.config @@ -14,6 +14,7 @@ "src/riak_api_web_security.erl", "src/riak_api_web_socket.erl", "src/riak_api_web.erl", + "src/riak_api_web_handler.erl" "test/end_to_end/*.erl", "rebar.config" ]}, diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 0105327..4e9ce29 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -87,8 +87,9 @@ }. -type stream_fun() :: fun(() -> {ok, binary()} | done). +-type send_fun() :: fun((binary()) -> ok|{error, any()}). --export_type([halt_response/0, method/0]). +-export_type([halt_response/0, method/0, response_code/0]). %%%============================================================================ %%% API @@ -131,6 +132,10 @@ loop(Socket, InitBuffer) -> ok end. +-spec handle_request( + riak_api_web_socket:socket(), binary() +) -> + {boolean(), binary()} | close. handle_request(Socket, InitBuffer) -> StartTime = os:system_time(microsecond), reset_version(), @@ -175,7 +180,7 @@ handle_request(Socket, InitBuffer) -> MaxBodySize ), ok ?= send_continue(Socket, ReqHeaders), - {ok, Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1, ModCtx4} ?= + {ok, ModCtx4, {Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1}} ?= CallbackMod:process_request( ModCtx3, InitReqBdy @@ -212,19 +217,20 @@ handle_request(Socket, InitBuffer) -> -define(VERSION_KEY, {?MODULE, http_version}). -set_version({1, 0}) -> - put(?VERSION_KEY, <<"HTTP 1.0">>); -set_version({1, 1}) -> - put(?VERSION_KEY, <<"HTTP 1.1">>). +-spec set_version(http_version()) -> ok. +set_version(Version) when Version == {1, 0}; Version == {1, 1} -> + put(?VERSION_KEY, Version). +-spec get_version() -> http_version(). get_version() -> case get(?VERSION_KEY) of undefined -> - <<"HTTP 1.0">>; + {1, 0}; Tag -> Tag end. +-spec reset_version() -> ok. reset_version() -> put(?VERSION_KEY, undefined). @@ -267,11 +273,11 @@ split_path(URIPath) -> -spec extend_buffer( riak_api_web_socket:socket(), binary(), - non_neg_integer(), + non_neg_integer() | line, pos_integer() | undefined ) -> binary(). -extend_buffer(Socket, Buffer, Needed, Timeout) -> +extend_buffer(Socket, Buffer, Needed, Timeout) when is_integer(Needed) -> case riak_api_web_socket:recv(Socket, Needed, get_timeout(Timeout)) of {ok, Data} when is_binary(Data) -> <>; @@ -283,6 +289,19 @@ extend_buffer(Socket, Buffer, Needed, Timeout) -> ), riak_api_web_socket:close(Socket), exit(normal) + end; +extend_buffer(Socket, Buffer, line, Timeout) -> + case riak_api_web_socket:recv_line(Socket, get_timeout(Timeout)) of + {ok, Data} when is_binary(Data) -> + <>; + {error, Reason} -> + ?LOG_WARNING( + "Unexpected failure to read data from client " + "~w for socket ~0p", + [Reason, Socket] + ), + riak_api_web_socket:close(Socket), + exit(normal) end. -spec extend_buffer_fun( @@ -471,15 +490,19 @@ handle_response( } ) -> RequestCompleteTime = os:system_time(microsecond), - stream_response(RspCode, RspHeaders, StreamFun, Socket), - ResponseCompleteTime = os:system_time(microsecond), - CallbackMod:record_request( - Context, - StartTime, - RequestCompleteTime, - ResponseCompleteTime, - stream_complete + stream_response( + RspCode, + RspHeaders, + StreamFun, + fun(B) -> riak_api_web_socket:send(Socket, B) end ), + ResponseCompleteTime = os:system_time(microsecond), + ok = + CallbackMod:record_request( + Context, + {StartTime, RequestCompleteTime, ResponseCompleteTime}, + stream_complete + ), {Keepalive, BufferIn}; handle_response( { @@ -497,13 +520,12 @@ handle_response( RequestCompleteTime = os:system_time(microsecond), send_response(RspCode, RspHeaders, RspBody, Socket), ResponseCompleteTime = os:system_time(microsecond), - CallbackMod:record_request( - Context, - StartTime, - RequestCompleteTime, - ResponseCompleteTime, - send_complete - ), + ok = + CallbackMod:record_request( + Context, + {StartTime, RequestCompleteTime, ResponseCompleteTime}, + send_complete + ), {Keepalive, BufferIn}; handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> MergedRspHeaders = @@ -531,11 +553,47 @@ send_continue(Socket, ReqHeaders) -> response_code(), riak_api_web_headers:headers(), stream_fun(), - riak_api_web_socket:socket() + send_fun() ) -> ok. -stream_response(_RspCode, _RspHeaders, _StreamFun, _Socket) -> - ok. +stream_response(RspCode, RspHeaders, StreamFun, SendFun) -> + RspLine = get_response_line(get_version(), RspCode), + FinalHeaders = + riak_api_web_headers:enter( + 'Transfer-Encoding', + <<"chunked">>, + RspHeaders + ), + Metadata = riak_api_web_headers:output_response_block(FinalHeaders), + ok = + SendFun( + << + RspLine/binary, + Metadata/binary, + <<"\r\n">>/binary + >> + ), + stream_response(StreamFun, SendFun). + +stream_response(StreamFun, SendFun) -> + case StreamFun() of + {<<>>, NextFun} -> + stream_response(NextFun, SendFun); + done -> + SendFun(<<"0\r\n\r\n">>); + {Bin, NextFun} when is_binary(Bin) -> + BS = integer_to_binary(byte_size(Bin), 16), + ok = + SendFun( + << + BS/binary, + <<"\r\n">>/binary, + Bin/binary, + <<"\r\n">>/binary + >> + ), + stream_response(NextFun, SendFun) + end. -spec send_response( response_code(), @@ -543,10 +601,56 @@ stream_response(_RspCode, _RspHeaders, _StreamFun, _Socket) -> binary(), riak_api_web_socket:socket() ) -> - ok. -send_response(_RspCode, _RspHeaders, _RspBody, _Socket) -> - _Version = get_version(), - ok. + ok | {error, any()}. +send_response(RspCode, RspHeaders, RspBody, Socket) -> + riak_api_web_socket:send( + Socket, + generate_binary_response(RspCode, RspHeaders, RspBody) + ). + +-spec generate_binary_response( + response_code(), + riak_api_web_headers:headers(), + binary() +) -> + binary(). +generate_binary_response(RspCode, RspHeaders, RspBody) -> + RspLine = get_response_line(get_version(), RspCode), + FinalHeaders = + riak_api_web_headers:enter( + 'Content-Length', + integer_to_binary(byte_size(RspBody)), + RspHeaders + ), + Metadata = riak_api_web_headers:output_response_block(FinalHeaders), + << + RspLine/binary, + Metadata/binary, + <<"\r\n">>/binary, + RspBody/binary + >>. + +-spec get_response_line(http_version(), response_code()) -> binary(). +get_response_line({1, 0}, RspCode) -> + iolist_to_binary( + [ + <<"HTTP/1.0 ">>, + integer_to_binary(RspCode), + <<" ">>, + httpd_util:reason_phrase(RspCode), + <<"\r\n">> + ] + ); +get_response_line({1, 1}, RspCode) -> + iolist_to_binary( + [ + <<"HTTP/1.1 ">>, + integer_to_binary(RspCode), + <<" ">>, + httpd_util:reason_phrase(RspCode), + <<"\r\n">> + ] + ). start_clock() -> ets:new( @@ -604,4 +708,79 @@ clock_test() -> ), ?assert(MeanCached < MeanUnCached). +simple_response_test() -> + set_version({1, 1}), + FullResponse = + generate_binary_response( + 200, + default_response_headers(false), + <<"OutputOK">> + ), + Date = list_to_binary(httpd_util:rfc1123_date()), + ExpectedResponse = + << + <<"HTTP/1.1 200 OK\r\n">>/binary, + <<"Connection: close\r\n">>/binary, + <<"Date: ">>/binary, + Date/binary, + <<"\r\n">>/binary, + <<"Server: RiakAPI/4.0 SilverMachine\r\n">>/binary, + <<"Content-Length: 8\r\n">>/binary, + <<"\r\n">>/binary, + <<"OutputOK">>/binary + >>, + ?assertMatch(ExpectedResponse, FullResponse). + +simple_strean_test() -> + SendFun = + fun(Bin) when is_binary(Bin) -> + case get({?MODULE, ?TEST, send_buffer}) of + AccBin when is_binary(AccBin) -> + put( + {?MODULE, ?TEST, send_buffer}, + <> + ); + undefined -> + put({?MODULE, ?TEST, send_buffer}, Bin) + end, + ok + end, + put({?MODULE, ?TEST, send_buffer}, undefined), + Me = self(), + spawn( + fun() -> + Me ! <<"Wiki">>, + Me ! <<"Pedia ">>, + Me ! <<"in chunks!">>, + Me ! done + end + ), + Date = list_to_binary(httpd_util:rfc1123_date()), + stream_response(200, default_response_headers(true), stream_fun(), SendFun), + Response = get({?MODULE, ?TEST, send_buffer}), + ExpectedResponse = + << + <<"HTTP/1.1 200 OK\r\n">>/binary, + <<"Connection: keep-alive\r\n">>/binary, + <<"Date: ">>/binary, + Date/binary, + <<"\r\n">>/binary, + <<"Transfer-Encoding: chunked\r\n">>/binary, + <<"Server: RiakAPI/4.0 SilverMachine\r\n">>/binary, + <<"\r\n">>/binary, + << "4\r\nWiki\r\n6\r\nPedia " + "\r\nA\r\nin chunks!\r\n0\r\n\r\n">>/binary + >>, + ?assertMatch(ExpectedResponse, Response). + +stream_fun() -> + fun() -> + receive + Bin when is_binary(Bin) -> + {Bin, stream_fun()}; + done -> + done + end + end. + -endif. diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl new file mode 100644 index 0000000..9e3799f --- /dev/null +++ b/src/riak_api_web_handler.erl @@ -0,0 +1,162 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Behaviour definition for a web handler +%% +%% the callbacks will be called in the following order, with the context +%% returned from the previous call included in the next +%% - match_route/2 +%% - check_permissions/4 +%% - parse_query_params/2 +%% - parse_request_headers/2 +%% - process_request/2 +%% - record_request/3 + +-module(riak_api_web_handler). + +-type context() :: term(). + + +-type max_header_count() :: pos_integer(). + %% The maximum number of headers that will be parsed + %% A header split over multiple lines will be counted once for each line + %% e.g. + %% X-Riak-Index_field1_bin : value1 + %% X-Riak-Index_field1_bin : value2 + %% + %% Will count as two headers +-type max_header_size() :: pos_integer(). + %% The maximum size of a single header value. If concatenating multiple + %% values causes issues with this limit - the may be split across headers. +-type max_body_size() :: pos_integer(). + %% The maximum size of the body (on the wire) i.e. prior to being unzipped + %% if compression is allowed +-type limits() :: {max_header_count(), max_header_size(), max_body_size()}. + +%% @doc match_route for the module +%% When called each route handled by this module must be checked, and either +%% `no_match` returned should none match - or the initial context with the +%% limits for that route. +-callback match_route( + riak_api_web_acceptor:method(), + unicode:chardata() +) -> + no_match|{ok, context(), limits()}. + + +-type peer() :: inet:ip_address(). + %% The IP address of the client device connected to the socket + +%% @doc check_permissions for using this module or route +%% The context() passed will be the context() returned from match_route/2 - so +%% if route information is required for permissions checks, it should be added +%% to the context. +%% +%% On failure return a halt_response with e.g. 401 /403 response codes +-callback + check_permissions( + context(), + riak_api_web_headers:headers(), + riak_api_web_socket:scheme(), + peer() + ) -> + {ok, context()}|riak_api_web_acceptor:halt_response(). + + +-type query_params() :: #{binary() => binary()}. + +%% @doc parse and validate query params, passed as a map +%% Any parameter will have both key and value as a binary, except if the +%% parameter had no value - in which case the value will be the atom `true` +-callback + parse_query_params( + context(), + query_params() + ) -> + {ok, context()}|riak_api_web_acceptor:halt_response(). + +%% @doc parse and validate the request headers +-callback + parse_request_headers( + context(), + riak_api_web_headers:headers() + ) -> + {ok, context()}|riak_api_web_acceptor:halt_response(). + +-type stream_fun() :: fun(() -> {binary(), done|stream_fun()}). +-type response_body() :: + binary() | {stream, stream_fun()}. + +%% @doc Process the request and produce a response +%% The request may receive an object body, the request body element is a +%% riak_api_web_body:req_body() record. Calling riak_api_web_body:get_body/3 +%% will return the body, either in whole or one slice at a time (by setting a +%% slice length as the second attribute of the get_body/3 function, and +%% re submitting the req_body() returned into subsequent get_body/3 calls). +%% +%% Thw headers in the response need not contain the following header elements +%% which will be generated automatically: +%% - 'Server' +%% - 'Date' +%% - 'Connection' +%% - 'Content-Length'/'Transfer-Encoding' +%% +%% The response_body() may either be a binary to be sent with a fixed content +%% length, or a stream_fun() where calls to the stream_fun() will produce +%% either: +%% - a binary() chunk and an updated stream_fun() +%% - the atom() done +%% +%% Each binary() returned from the stream_fun() will be sent as a chunk in the +%% response. +%% +%% The KeepAliveOK boolean() indicates if it is OK to reuse this connection. +%% Validation of the version and request headers is not required, this is +%% performed by the acceptor if the callback indicates that keepalive is +%% acceptable. +%% +%% The final req_body() must also be returned, so that any remaining data on +%% the buffer is available to the acceptor. +-callback + process_request( + context(), + riak_api_web_body:req_body() + ) -> + { + ok, + context(), + riak_api_web_acceptor:response_code(), + riak_api_web_headers:headers(), + response_body(), + boolean(), + riak_api_web_body:req_body() + } | riak_api_web_acceptor:halt_response(). + +-type timings() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. + % The result of os:system_time(microsecond) for + % - the start of the request (after accepting a connection, but prior to + % receiving and routing the request) + % - the completion of receipt and processing the request, and calling + % process_request/2. + % - the completion of sending the response to the socket +-type completion() :: stream_complete | send_complete. + % was the output sent chunk encoded, or sent as a whole body + +%% @doc Record the output of the interaction +-callback record_request(context(), timings(), completion()) -> ok. \ No newline at end of file diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index fbaa459..674b35c 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -143,7 +143,7 @@ -type tcp_error() :: closed | timeout | system_limit | inet:posix(). -type tls_error() :: term(). --export_type([socket/0]). +-export_type([socket/0, scheme/0]). %%%============================================================================ %%% API From b471a7435255b9249926e67f9e93c21bc6045a94 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2026 17:59:39 +0000 Subject: [PATCH 07/53] Formatting --- src/riak_api_web_acceptor.erl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 4e9ce29..c260c51 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -87,7 +87,7 @@ }. -type stream_fun() :: fun(() -> {ok, binary()} | done). --type send_fun() :: fun((binary()) -> ok|{error, any()}). +-type send_fun() :: fun((binary()) -> ok | {error, any()}). -export_type([halt_response/0, method/0, response_code/0]). @@ -558,7 +558,7 @@ send_continue(Socket, ReqHeaders) -> ok. stream_response(RspCode, RspHeaders, StreamFun, SendFun) -> RspLine = get_response_line(get_version(), RspCode), - FinalHeaders = + FinalHeaders = riak_api_web_headers:enter( 'Transfer-Encoding', <<"chunked">>, @@ -612,11 +612,11 @@ send_response(RspCode, RspHeaders, RspBody, Socket) -> response_code(), riak_api_web_headers:headers(), binary() -) -> +) -> binary(). generate_binary_response(RspCode, RspHeaders, RspBody) -> RspLine = get_response_line(get_version(), RspCode), - FinalHeaders = + FinalHeaders = riak_api_web_headers:enter( 'Content-Length', integer_to_binary(byte_size(RspBody)), @@ -768,8 +768,10 @@ simple_strean_test() -> <<"Transfer-Encoding: chunked\r\n">>/binary, <<"Server: RiakAPI/4.0 SilverMachine\r\n">>/binary, <<"\r\n">>/binary, - << "4\r\nWiki\r\n6\r\nPedia " - "\r\nA\r\nin chunks!\r\n0\r\n\r\n">>/binary + << + "4\r\nWiki\r\n6\r\nPedia " + "\r\nA\r\nin chunks!\r\n0\r\n\r\n" + >>/binary >>, ?assertMatch(ExpectedResponse, Response). From 57d18678d1e78c717eab307087a8c30d0c120337 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 30 Mar 2026 10:59:11 +0100 Subject: [PATCH 08/53] Accepting chunked requests in slices Plus some further testing/formatting --- src/riak_api_web_acceptor.erl | 55 +++++++++++++++++++++- src/riak_api_web_body.erl | 89 ++++++++++++++++++++++++++++++----- src/riak_api_web_handler.erl | 4 ++ 3 files changed, 136 insertions(+), 12 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index c260c51..ecd7ca8 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -731,7 +731,7 @@ simple_response_test() -> >>, ?assertMatch(ExpectedResponse, FullResponse). -simple_strean_test() -> +simple_stream_test() -> SendFun = fun(Bin) when is_binary(Bin) -> case get({?MODULE, ?TEST, send_buffer}) of @@ -785,4 +785,57 @@ stream_fun() -> end end. +expect_test() -> + FixedLength = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>} + ] + ), + ?assertMatch({ok, {1024, false}}, expect_body(FixedLength)), + FixedLengthGZ = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {'Transfer-Encoding', <<"gzip">>} + ] + ), + ?assertMatch({ok, {1024, true}}, expect_body(FixedLengthGZ)), + UnsupportedCompress = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {'Transfer-Encoding', <<"deflate">>} + ] + ), + {halt, 400, none, Error1, _} = expect_body(UnsupportedCompress), + ?assertNotMatch( + nomatch, + string:find(Error1, <<"unsupported transfer encoding">>) + ), + NoLength = + riak_api_web_headers:make( + [ + {'Transfer-Encoding', <<"gzip">>} + ] + ), + {halt, 400, none, Error2, _} = expect_body(NoLength), + ?assertNotMatch( + nomatch, + string:find(Error2, <<"without content length">>) + ), + ContentSmuggle = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {'Transfer-Encoding', <<"gzip">>}, + {'Content-Length', <<"262144">>} + ] + ), + {halt, 400, none, Error3, _} = expect_body(ContentSmuggle), + ?assertNotMatch( + nomatch, + string:find(Error3, <<"non-unique length">>) + ). + -endif. diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index ac2772d..098c7af 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -21,10 +21,16 @@ %% @doc Handling functions for receiving and sending object bodies over HTTP %% %% Handling of chunked requests, and some other parts inspired by webmachine. +%% +%% It is possible to accept the inbound request in slices. If there is a fixed +%% content length this will read off the receiver buffer a slice of data at a +%% time. If the transfer encoding is chunked it will buffer the greater of the +%% slice length and the chunk length - i.e. sending chunks > than the slice +%% length will require more memory. -module(riak_api_web_body). --export([get_buffer/1, initiate_body/5, get_body/3]). +-export([get_buffer/1, initiate_body/5, get_body/3, is_gzip/1]). -record(req_body, { buffer :: binary(), @@ -34,6 +40,12 @@ max_size :: pos_integer(), buffer_fun :: buffer_fun(), chunk_buff = <<>> :: binary(), + % Receive buffer used when chunk encoding + % if slice length is all, all body is accumulated here, and if slice + % length is an integer a slice will be extracted if the function is + % called when the chunk_buff is greater than or equal to the slice + % length + transfer_complete = false :: boolean(), test_packets = [] :: list(binary()) % only used in tests }). @@ -50,6 +62,10 @@ %%% API %%%============================================================================ +-spec is_gzip(req_body()) -> boolean(). +is_gzip(ReqBody) -> + ReqBody#req_body.gzip. + -spec get_buffer(req_body()) -> binary(). get_buffer(ReqBody) -> ReqBody#req_body.buffer. @@ -86,6 +102,12 @@ get_body(#req_body{content_length = CL, acc_size = AS} = RqBdy, _SL, _TO) when is_integer(CL), CL == AS -> {done, RqBdy}; +get_body( + #req_body{content_length = CL, transfer_complete = TC} = RqBdy, + _SL, + _TO +) when CL == chunked, TC -> + {done, RqBdy}; get_body( #req_body{content_length = CL, acc_size = AccSize, buffer = Bin} = RqBdy, all, @@ -151,9 +173,21 @@ get_body( ) end end; +get_body( + #req_body{content_length = CL, chunk_buff = ChunkBuff} = RqBdy, + SL, + _TO +) when CL == chunked, is_integer(SL), byte_size(ChunkBuff) >= SL -> + <> = ChunkBuff, + { + Slice, + RqBdy#req_body{ + chunk_buff = ChunkBuffRem + } + }; get_body( #req_body{content_length = CL, max_size = MS, acc_size = AS} = RqBdy, - all, + SL, TO ) when CL == chunked -> case erlang:decode_packet(line, RqBdy#req_body.buffer, []) of @@ -164,7 +198,7 @@ get_body( line, TO ), - all, + SL, TO ); {ok, Line, Rest} when is_binary(Line) -> @@ -186,7 +220,11 @@ get_body( <<"\r\n", Next/binary>> = get_buffer(FinalRqBdy), { RcvBuffer, - FinalRqBdy#req_body{buffer = Next, chunk_buff = <<>>} + FinalRqBdy#req_body{ + buffer = Next, + chunk_buff = <<>>, + transfer_complete = true + } }; {N, NextSize} when N > 0, NextSize =< MS -> case byte_size(Rest) of @@ -200,7 +238,7 @@ get_body( <>, acc_size = AS + ChunkSize }, - all, + SL, TO ); BS -> @@ -219,7 +257,7 @@ get_body( <>, acc_size = AS + ChunkSize }, - all, + SL, TO ) end; @@ -227,11 +265,9 @@ get_body( {error, content_too_large} end; {more, _} -> - % have to get the whole receive buffer, or get one byte at a - % time - don't want to ask for more than one byte get_body( extend_buffer(RqBdy, line, TO), - all, + SL, TO ) end. @@ -277,6 +313,7 @@ extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). slicing_fixed_length_test() -> %% Receive a 11KB body in 1KB packets @@ -400,10 +437,38 @@ get_standard_wikipedia_test() -> ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), ?assertMatch(<<>>, get_buffer(RqBdyEnd)). +get_standard_wikipedia_inslices_test() -> + Packets = + [ + <<"4\r\n">>, + <<"Wiki\r\n">>, + <<"5\r\n">>, + <<"pedia\r\n">>, + <<"e\r\n">>, + <<" in\r\n\r\nchunks.\r\n">>, + <<"0\r\n">>, + <<"\r\n">> + ], + RqBdyInit = + #req_body{ + buffer = <<"">>, + content_length = chunked, + max_size = 1024 * 1024, + test_packets = Packets + }, + {Slice1, RqBdy1} = get_body(RqBdyInit, 5, 1000), + ?assertMatch(<<"Wikip">>, Slice1), + {Slice2, RqBdy2} = get_body(RqBdy1, 5, 1000), + ?assertMatch(<<"edia ">>, Slice2), + {Slice3, RqBdy3} = get_body(RqBdy2, 100, 1000), + ?assertMatch(<<"in\r\n\r\nchunks.">>, Slice3), + ?assertMatch({done, RqBdy3}, get_body(RqBdy3, 5, 1000)). + get_wikipedia_from_buffer_test() -> + <<>> = dummy_extend_fun(<<>>, none, none), {ok, RqBdyInit} = initiate_body( - fun(B, _, _) -> B end, + fun dummy_extend_fun/3, <<"4\r\nWiki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n">>, chunked, false, @@ -415,6 +480,8 @@ get_wikipedia_from_buffer_test() -> ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), ?assertMatch(<<>>, get_buffer(RqBdyEnd)). +dummy_extend_fun(B, _, _) when is_binary(B) -> B. + ignore_extension_test() -> Packets = [ @@ -430,7 +497,7 @@ ignore_extension_test() -> ], RqBdyInit = #req_body{ - buffer = <<"">>, + buffer = <<>>, content_length = chunked, max_size = 1024 * 1024, test_packets = Packets diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 9e3799f..3d500d5 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -123,6 +123,10 @@ %% - a binary() chunk and an updated stream_fun() %% - the atom() done %% +%% The response object may be gzipped - the callback function should handle +%% this, or error as appropriate. the riak_api_web_body:is_gzip/1 function can +%% be checked to see if the object is gzipped. +%% %% Each binary() returned from the stream_fun() will be sent as a chunk in the %% response. %% From df18274d27b31ad12aad723d4936bee44ae4abe5 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 30 Mar 2026 11:10:50 +0100 Subject: [PATCH 09/53] Formatting --- src/riak_api_web_body.erl | 48 ++++++------------------------------- src/riak_api_web_socket.erl | 4 ++++ 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index 098c7af..31cec27 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -116,19 +116,9 @@ get_body( case byte_size(Bin) + AccSize of AccSize0 when AccSize0 >= CL -> <> = Bin, - { - ReqBody, - RqBdy#req_body{ - buffer = Rest, - acc_size = CL - } - }; + {ReqBody, RqBdy#req_body{buffer = Rest, acc_size = CL}}; AccSize0 -> - get_body( - extend_buffer(RqBdy, CL - AccSize0, TO), - all, - TO - ) + get_body(extend_buffer(RqBdy, CL - AccSize0, TO), all, TO) end; get_body( #req_body{content_length = CL, acc_size = AccSize, buffer = Bin} = RqBdy, @@ -140,19 +130,9 @@ get_body( case byte_size(Bin) of BS when BS >= Remaining -> <> = Bin, - { - SliceBody, - RqBdy#req_body{ - buffer = Rest, - acc_size = CL - } - }; + {SliceBody, RqBdy#req_body{buffer = Rest, acc_size = CL}}; BS -> - get_body( - extend_buffer(RqBdy, Remaining - BS, TO), - all, - TO - ) + get_body(extend_buffer(RqBdy, Remaining - BS, TO), SL, TO) end; _Remaining -> case byte_size(Bin) of @@ -166,11 +146,7 @@ get_body( } }; BS -> - get_body( - extend_buffer(RqBdy, SL - BS, TO), - SL, - TO - ) + get_body(extend_buffer(RqBdy, SL - BS, TO), SL, TO) end end; get_body( @@ -179,12 +155,7 @@ get_body( _TO ) when CL == chunked, is_integer(SL), byte_size(ChunkBuff) >= SL -> <> = ChunkBuff, - { - Slice, - RqBdy#req_body{ - chunk_buff = ChunkBuffRem - } - }; + {Slice, RqBdy#req_body{chunk_buff = ChunkBuffRem}}; get_body( #req_body{content_length = CL, max_size = MS, acc_size = AS} = RqBdy, SL, @@ -265,11 +236,7 @@ get_body( {error, content_too_large} end; {more, _} -> - get_body( - extend_buffer(RqBdy, line, TO), - SL, - TO - ) + get_body(extend_buffer(RqBdy, line, TO), SL, TO) end. -spec get_chunk_size(binary()) -> non_neg_integer(). @@ -313,7 +280,6 @@ extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --include_lib("stdlib/include/assert.hrl"). slicing_fixed_length_test() -> %% Receive a 11KB body in 1KB packets diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 674b35c..331c265 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -44,6 +44,10 @@ -module(riak_api_web_socket). +-if(?OTP_RELEASE == 26). +-feature(maybe_expr, enable). +-endif. + -behaviour(gen_server). -export( From f9469ae327b1f646cb5c22d283047009070367a2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 30 Mar 2026 17:41:47 +0100 Subject: [PATCH 10/53] Add initial end-to-end test --- src/riak_api_web.erl | 24 ++- src/riak_api_web_acceptor.erl | 112 ++++++----- src/riak_api_web_handler.erl | 31 ++- src/riak_api_web_socket.erl | 42 ++-- test/riak_api_web_get_random.erl | 327 +++++++++++++++++++++++++++++++ 5 files changed, 464 insertions(+), 72 deletions(-) create mode 100644 test/riak_api_web_get_random.erl diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 569558f..f1a2851 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -29,7 +29,8 @@ get_listeners/0, binding_config/2, add_routes/1, - get_route/2 + get_route/2, + spec_name/3 ] ). @@ -66,7 +67,16 @@ get_route([{_P, CallbackMod} | Rest], Method, Path) -> case CallbackMod:match_route(Method, Path) of no_match -> get_route(Rest, Method, Path); - {Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} -> + {method_not_allowed, AllowedMethods} -> + AllowHdrVal = + iolist_to_binary( + lists:join( + <<", ">>, + lists:map(fun atom_to_binary/1, AllowedMethods) + ) + ), + {halt, 405, [{'Allow', AllowHdrVal}], <<>>, []}; + {ok, Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} -> {ok, CallbackMod, Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} end. @@ -102,9 +112,11 @@ binding_config(Scheme, Binding) -> Name = spec_name(Scheme, Ip, Port), Config = spec_from_binding(Scheme, Name, Binding), - {Name, {webmachine_mochiweb, start, [Config]}, permanent, 5000, worker, [ - mochiweb_socket_server - ]}. + { + Name, + {riak_api_web_socket, start, [Config]}, permanent, 5000, worker, + [riak_api_web_socket] + }. spec_from_binding(http, Name, {Ip, Port}) -> Options = @@ -151,7 +163,7 @@ spec_name(Scheme, Ip, Port) -> true -> Ip end, - lists:flatten(io_lib:format("~s://~s:~p", [Scheme, FormattedIP, Port])). + iolist_to_binary(io_lib:format("~s://~s:~p", [Scheme, FormattedIP, Port])). common_config() -> [ diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index ecd7ca8..9ad855d 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -26,7 +26,7 @@ -feature(maybe_expr, enable). -endif. --export([start_link/1, init/2]). +-export([start_link/2, init/3]). -export([extend_buffer/4, start_clock/0]). @@ -60,7 +60,7 @@ { halt, response_code(), - riak_api_web_headers:headers() | none, + riak_api_web_headers:header_list(), binary(), list() }. @@ -71,7 +71,8 @@ response_code(), riak_api_web_headers:header_list(), binary(), - riak_api_web_socket:socket() + riak_api_web_socket:socket(), + ets:table() }. -type good_result() :: { @@ -95,21 +96,21 @@ %%% API %%%============================================================================ --spec start_link(riak_api_web_socket:socket()) -> pid(). -start_link(Socket) -> - spawn_link(?MODULE, init, [self(), Socket]). +-spec start_link(riak_api_web_socket:socket(), ets:table()) -> pid(). +start_link(Socket, Clock) -> + spawn_link(?MODULE, init, [self(), Socket, Clock]). --spec init(pid(), riak_api_web_socket:socket()) -> ok. -init(Server, Socket) -> - case riak_api_web_socket:accept(Socket, ?ACCEPT_TIMEOUT) of +-spec init(pid(), riak_api_web_socket:socket(), ets:table()) -> ok. +init(Server, Listener, Clock) -> + case riak_api_web_socket:accept(Listener, ?ACCEPT_TIMEOUT) of {ok, Socket} -> ok = riak_api_web_socket:acceptor_accepted(Server), - loop(Socket, <<>>); + loop(Socket, <<>>, Clock); {error, timeout} -> - init(Server, Socket); + init(Server, Listener, Clock); {error, {tls_alert, Alert}} -> ?LOG_WARNING("TLS Alert received ~0p", [Alert]), - init(Server, Socket); + init(Server, Listener, Clock); {error, closed} -> ok; {error, Other} -> @@ -120,23 +121,25 @@ init(Server, Socket) -> %%% Primary Loop %%%============================================================================ --spec loop(riak_api_web_socket:socket(), binary()) -> ok. -loop(Socket, InitBuffer) -> +-spec loop(riak_api_web_socket:socket(), binary(), ets:table()) -> ok. +loop(Socket, InitBuffer, Clock) -> %% In the keepalive loop, the send buffer is assumed to be empty %% An so pipelining of requests (in parallel) is explicitly not supported - case handle_request(Socket, InitBuffer) of + case handle_request(Socket, InitBuffer, Clock) of {KeepAlive, Buffer} when KeepAlive == true -> - loop(Socket, Buffer); + loop(Socket, Buffer, Clock); _Close -> riak_api_web_socket:close(Socket), ok end. -spec handle_request( - riak_api_web_socket:socket(), binary() + riak_api_web_socket:socket(), + binary(), + ets:table() ) -> {boolean(), binary()} | close. -handle_request(Socket, InitBuffer) -> +handle_request(Socket, InitBuffer, Clock) -> StartTime = os:system_time(microsecond), reset_version(), RequestResult = @@ -191,7 +194,7 @@ handle_request(Socket, InitBuffer) -> MergedRspHeaders = riak_api_web_headers:enter_from_list( RspHeaders, - default_response_headers(Keepalive) + default_response_headers(Clock, Keepalive) ), { finish, @@ -207,7 +210,7 @@ handle_request(Socket, InitBuffer) -> else {halt, HaltRspCode, HaltRspHeaders, HaltRspText, HaltRspSubs} -> HaltRspBody = generate_error_body(HaltRspText, HaltRspSubs), - {halt, HaltRspCode, HaltRspHeaders, HaltRspBody, Socket} + {halt, HaltRspCode, HaltRspHeaders, HaltRspBody, Socket, Clock} end, handle_response(RequestResult). @@ -240,7 +243,7 @@ reset_version() -> -spec bad_request(binary(), list()) -> halt_response(). bad_request(Error, Subs) -> - {halt, 400, none, Error, Subs}. + {halt, 400, [], Error, Subs}. -spec split_path( iodata() @@ -394,11 +397,11 @@ get_request_line(Socket, Buffer) -> -> {ok, {SM, Path, SV, Rest}}; _USM -> - {halt, 405, none, <<>>, []} + {halt, 405, [], <<>>, []} end; _USV -> USVError = <<"Only HTTP 1.0 and 1.1 supported">>, - {halt, 505, none, USVError, []} + {halt, 505, [], USVError, []} end; {ok, {http_error, Error}, _} -> bad_request(<<"HTTP error on inbound request ~0p">>, [Error]); @@ -527,11 +530,11 @@ handle_response( send_complete ), {Keepalive, BufferIn}; -handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> +handle_response({halt, RspCode, RspHeaders, RspBody, Socket, Clock}) -> MergedRspHeaders = riak_api_web_headers:enter_from_list( RspHeaders, - default_response_headers(false) + default_response_headers(Clock, false) ), send_response(RspCode, MergedRspHeaders, RspBody, Socket), close. @@ -655,20 +658,24 @@ get_response_line({1, 1}, RspCode) -> start_clock() -> ets:new( ?MODULE, - [named_table, {read_concurrency, true}] + [public, {read_concurrency, true}] ). --spec default_response_headers(boolean()) -> riak_api_web_headers:headers(). -default_response_headers(KeepAlive) -> +-spec default_response_headers( + ets:table(), + boolean() +) -> + riak_api_web_headers:headers(). +default_response_headers(Clock, KeepAlive) -> DateHeader = - case {os:system_time(second), ets:lookup(?MODULE, rfc1123)} of + case {os:system_time(second), ets:lookup(Clock, rfc1123)} of {Now, [{rfc1123, {CachedTime, CachedHdr}}]} when Now == CachedTime -> CachedHdr; {Now, _} -> Hdr = {'Date', list_to_binary(httpd_util:rfc1123_date())}, - ets:insert(?MODULE, {rfc1123, {Now, Hdr}}), + ets:insert(Clock, {rfc1123, {Now, Hdr}}), Hdr end, ServerHeader = {'Server', <<"RiakAPI/4.0 SilverMachine">>}, @@ -691,14 +698,19 @@ default_response_headers(KeepAlive) -> -include_lib("eunit/include/eunit.hrl"). clock_test() -> - start_clock(), - {TC1, _Hdrs1} = timer:tc(fun() -> default_response_headers(true) end), - {TC2, _Hdrs2} = timer:tc(fun() -> default_response_headers(true) end), - {TC3, _Hdrs3} = timer:tc(fun() -> default_response_headers(false) end), - {TC4, _Hdrs4} = timer:tc(fun() -> default_response_headers(true) end), + Clock = start_clock(), + {TC1, _Hdrs1} = + timer:tc(fun() -> default_response_headers(Clock, true) end), + {TC2, _Hdrs2} = + timer:tc(fun() -> default_response_headers(Clock, true) end), + {TC3, _Hdrs3} = + timer:tc(fun() -> default_response_headers(Clock, false) end), + {TC4, _Hdrs4} = + timer:tc(fun() -> default_response_headers(Clock, true) end), timer:sleep(1000), - {TC5, _Hdrs5} = timer:tc(fun() -> default_response_headers(true) end), - ?assertMatch(1, ets:info(?MODULE, size)), + {TC5, _Hdrs5} = + timer:tc(fun() -> default_response_headers(Clock, true) end), + ?assertMatch(1, ets:info(Clock, size)), MeanUnCached = (TC1 + TC5) div 2, MeanCached = (TC2 + TC3 + TC4) div 3, io:format( @@ -706,14 +718,16 @@ clock_test() -> "Cached ~w micros vs uncached ~w~n", [MeanCached, MeanUnCached] ), - ?assert(MeanCached < MeanUnCached). + ?assert(MeanCached < MeanUnCached), + ets:delete(Clock). simple_response_test() -> + Clock = start_clock(), set_version({1, 1}), FullResponse = generate_binary_response( 200, - default_response_headers(false), + default_response_headers(Clock, false), <<"OutputOK">> ), Date = list_to_binary(httpd_util:rfc1123_date()), @@ -729,9 +743,11 @@ simple_response_test() -> <<"\r\n">>/binary, <<"OutputOK">>/binary >>, - ?assertMatch(ExpectedResponse, FullResponse). + ?assertMatch(ExpectedResponse, FullResponse), + ets:delete(Clock). simple_stream_test() -> + Clock = start_clock(), SendFun = fun(Bin) when is_binary(Bin) -> case get({?MODULE, ?TEST, send_buffer}) of @@ -756,7 +772,12 @@ simple_stream_test() -> end ), Date = list_to_binary(httpd_util:rfc1123_date()), - stream_response(200, default_response_headers(true), stream_fun(), SendFun), + stream_response( + 200, + default_response_headers(Clock, true), + stream_fun(), + SendFun + ), Response = get({?MODULE, ?TEST, send_buffer}), ExpectedResponse = << @@ -773,7 +794,8 @@ simple_stream_test() -> "\r\nA\r\nin chunks!\r\n0\r\n\r\n" >>/binary >>, - ?assertMatch(ExpectedResponse, Response). + ?assertMatch(ExpectedResponse, Response), + ets:delete(Clock). stream_fun() -> fun() -> @@ -808,7 +830,7 @@ expect_test() -> {'Transfer-Encoding', <<"deflate">>} ] ), - {halt, 400, none, Error1, _} = expect_body(UnsupportedCompress), + {halt, 400, [], Error1, _} = expect_body(UnsupportedCompress), ?assertNotMatch( nomatch, string:find(Error1, <<"unsupported transfer encoding">>) @@ -819,7 +841,7 @@ expect_test() -> {'Transfer-Encoding', <<"gzip">>} ] ), - {halt, 400, none, Error2, _} = expect_body(NoLength), + {halt, 400, [], Error2, _} = expect_body(NoLength), ?assertNotMatch( nomatch, string:find(Error2, <<"without content length">>) @@ -832,7 +854,7 @@ expect_test() -> {'Content-Length', <<"262144">>} ] ), - {halt, 400, none, Error3, _} = expect_body(ContentSmuggle), + {halt, 400, [], Error3, _} = expect_body(ContentSmuggle), ?assertNotMatch( nomatch, string:find(Error3, <<"non-unique length">>) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 3d500d5..3a437b7 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -49,6 +49,18 @@ %% if compression is allowed -type limits() :: {max_header_count(), max_header_size(), max_body_size()}. +-export_type( + [ + limits/0, + peer/0, + query_params/0, + stream_fun/0, + response_body/0, + timings/0, + completion/0 + ] +). + %% @doc match_route for the module %% When called each route handled by this module must be checked, and either %% `no_match` returned should none match - or the initial context with the @@ -57,8 +69,9 @@ riak_api_web_acceptor:method(), unicode:chardata() ) -> - no_match|{ok, context(), limits()}. - + no_match | + {method_not_allowed, list(riak_api_web_acceptor:method())} | + {ok, context(), limits()}. -type peer() :: inet:ip_address(). %% The IP address of the client device connected to the socket @@ -79,7 +92,7 @@ {ok, context()}|riak_api_web_acceptor:halt_response(). --type query_params() :: #{binary() => binary()}. +-type query_params() :: [{binary(), binary()}]. %% @doc parse and validate query params, passed as a map %% Any parameter will have both key and value as a binary, except if the @@ -145,11 +158,13 @@ { ok, context(), - riak_api_web_acceptor:response_code(), - riak_api_web_headers:headers(), - response_body(), - boolean(), - riak_api_web_body:req_body() + { + riak_api_web_acceptor:response_code(), + riak_api_web_headers:header_list(), + response_body(), + boolean(), + riak_api_web_body:req_body() + } } | riak_api_web_acceptor:halt_response(). -type timings() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 331c265..165651d 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -64,7 +64,8 @@ init/1, handle_call/3, handle_cast/2, - handle_info/2 + handle_info/2, + terminate/2 ] ). @@ -91,7 +92,8 @@ listener :: socket(), pool_size = ?POOL_SIZE_DEFAULT :: pos_integer(), max_pool_size = ?POOL_SIZE_MAX_DEFAULT :: pos_integer(), - acceptor_pool = sets:new([{version, 2}]) :: sets:set() + acceptor_pool = sets:new([{version, 2}]) :: sets:set(), + clock :: ets:table() }). -type socket_option() :: @@ -218,12 +220,13 @@ init(Options) -> end, SocketOpts = default_socket_options(IP), {ok, Listener} = listen(Protocol, Port, SocketOpts, BufferOpts, SSLOpts), - {AcceptorPool, StartSize, MaxSize} = get_acceptor_pool(Listener, Options), + Clock = riak_api_web_acceptor:start_clock(), + {AcceptorPool, StartSize, MaxSize} = + get_acceptor_pool(Listener, Options, Clock), ?LOG_INFO( "Acceptor pool for web started on IP ~0p port ~w of size ~w", [IP, Port, StartSize] ), - riak_api_web_acceptor:start_clock(), { ok, #socket_state{ @@ -231,7 +234,8 @@ init(Options) -> port = Port, pool_size = StartSize, max_pool_size = MaxSize, - acceptor_pool = sets:from_list(AcceptorPool, [{version, 2}]) + acceptor_pool = sets:from_list(AcceptorPool, [{version, 2}]), + clock = Clock } }. @@ -255,7 +259,11 @@ handle_cast({set_max_pool_size, MPS}, State) -> handle_cast(accepted, State) -> case State#socket_state.pool_size of PS when PS < State#socket_state.max_pool_size -> - P = riak_api_web_acceptor:start_link(State#socket_state.listener), + P = + riak_api_web_acceptor:start_link( + State#socket_state.listener, + State#socket_state.clock + ), { noreply, State#socket_state{ @@ -285,6 +293,10 @@ handle_info({'EXIT', Pid, Reason}, State) -> ?LOG_ERROR("Acceptor ~p unexpectedly crashed: ~0p", [Pid, Reason]), handle_info({'EXIT', Pid, normal}, State). +terminate(_Reason, State) -> + ets:delete(State#socket_state.clock), + ok. + %%%============================================================================ %%% Internal Functions %%%============================================================================ @@ -299,9 +311,9 @@ default_socket_options(IPAddr) -> {active, false} ]. --spec get_acceptor_pool(socket(), list(option())) -> +-spec get_acceptor_pool(socket(), list(option()), ets:table()) -> {list(pid()), pos_integer(), pos_integer()}. -get_acceptor_pool(Listener, Options) -> +get_acceptor_pool(Listener, Options, Clock) -> StartSize = case lists:keyfind(web_acceptor_pool_start_size, 1, Options) of {acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> @@ -330,7 +342,11 @@ get_acceptor_pool(Listener, Options) -> is_integer(MaxSize), MaxSize >= StartSize -> - {start_acceptor_pool(Listener, StartSize), StartSize, MaxSize}; + { + start_acceptor_pool(Listener, StartSize, Clock), + StartSize, + MaxSize + }; InvalidConfig -> ?LOG_ERROR( "Invalid configuration of acceptor pool ~0p - " @@ -338,17 +354,17 @@ get_acceptor_pool(Listener, Options) -> [InvalidConfig] ), { - start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT), + start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT, Clock), ?POOL_SIZE_DEFAULT, ?POOL_SIZE_MAX_DEFAULT } end. --spec start_acceptor_pool(socket(), pos_integer()) -> list(pid()). -start_acceptor_pool(Listener, Size) -> +-spec start_acceptor_pool(socket(), pos_integer(), ets:table()) -> list(pid()). +start_acceptor_pool(Listener, Size, Clock) -> lists:map( fun(_I) -> - P = riak_api_web_acceptor:start_link(Listener), + P = riak_api_web_acceptor:start_link(Listener, Clock), true = is_pid(P), P end, diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl new file mode 100644 index 0000000..72bed16 --- /dev/null +++ b/test/riak_api_web_get_random.erl @@ -0,0 +1,327 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Test handler that responds with random data. + +-module(riak_api_web_get_random). + +-if(?OTP_RELEASE == 26). +-feature(maybe_expr, enable). +-endif. + +-behaviour(riak_api_web_handler). + +-export( + [ + match_route/2, + check_permissions/4, + parse_query_params/2, + parse_request_headers/2, + process_request/2, + record_request/3 + ] +). + +-ifdef(TEST). +-export( + [ + setup/0, + generator/1, + cleanup/1, + request_single_value/3 + ] +). +-endif. + +-record(context, + { + request_id :: non_neg_integer()|undefined, + required_size :: non_neg_integer()|undefined + } +). + +-type context() :: #context{}. + +-define(ID_HEADER_LWR, <<"x-riak-request_id">>). + +%% @doc match_route for the module +-spec match_route( + riak_api_web_acceptor:method(), + unicode:chardata() +) -> + no_match | + {method_not_allowed, list(riak_api_web_acceptor:method())} | + {ok, context(), riak_api_web_handler:limits()}. +match_route('GET', <<"/random_data">>) -> + {ok, #context{}, {10, 1024, 128 * 1024}}; +match_route(_, <<"/random_data">>) -> + {method_not_allowed, ['GET']}; +match_route(_, _) -> + no_match. + +%% @doc check_permissions for using this module or route +-spec + check_permissions( + context(), + riak_api_web_headers:headers(), + riak_api_web_socket:scheme(), + riak_api_web_handler:peer() + ) -> + {ok, context()}. +check_permissions(Ctx, _Hdrs, _Scheme, _Peer) -> + {ok, Ctx}. + +%% @doc parse and validate query params, passed as a map +-spec + parse_query_params( + context(), + riak_api_web_handler:query_params() + ) -> + {ok, context()}|riak_api_web_acceptor:halt_response(). +parse_query_params(#context{required_size = undefined}, []) -> + {halt, 400, [], <<"no required_size parameter">>, []}; +parse_query_params(Ctx, []) -> + {ok, Ctx}; +parse_query_params(Ctx, [{<<"required_size">>, RS}|Rest]) -> + try + case binary_to_integer(RS) of + RSI when is_integer(RSI), RSI >= 0 -> + parse_query_params(Ctx#context{required_size = RSI}, Rest); + _BadRS -> + {halt, 400, [], <<"invalid required_size ~0p">>, [RS]} + end + catch + _ : _ -> + {halt, 400, [], <<"invalid required_size ~0p">>, [RS]} + end; +parse_query_params(Ctx,[_Other|Rest]) -> + parse_query_params(Ctx, Rest). + +%% @doc parse and validate the request headers +-spec + parse_request_headers( + context(), + riak_api_web_headers:headers() + ) -> + {ok, context()}|riak_api_web_acceptor:halt_response(). +parse_request_headers(Ctx, ReqHeaders) -> + case riak_api_web_headers:lookup(?ID_HEADER_LWR, ReqHeaders, true) of + undefined -> + ErrorMsg = <<"request requires x-riak-request_id header">>, + {halt, 400, [], ErrorMsg, []}; + {_OrigKey, [RequestIDStr]} when is_binary(RequestIDStr) -> + try + RequestID = binary_to_integer(RequestIDStr), + true = RequestID > 0, + {ok, Ctx#context{request_id = RequestID}} + catch + error:badarg -> + {halt, 400, [], <<"invalid non-numeric request_id">>, []}; + error:{badmatch,false} -> + {halt, 400, [], <<"invalid negative request_id">>, []} + end; + {_OrigKey, MultipleIDs} when is_list(MultipleIDs) -> + {halt, 400, [], <<"multiple request_id provided">>, []} + end. + +%% @doc Process the request and produce a response +-spec + process_request( + context(), + riak_api_web_body:req_body() + ) -> + { + ok, + context(), + { + riak_api_web_acceptor:response_code(), + riak_api_web_headers:header_list(), + riak_api_web_handler:response_body(), + boolean(), + riak_api_web_body:req_body() + } + }. +process_request(Ctx = #context{request_id = RqID, required_size = RS}, RqBdy) + when is_integer(RqID), is_integer(RS), RS > 0 -> + Body = crypto:strong_rand_bytes(RS), + RspHdr = + {<<"X-Riak-request_id">>, integer_to_binary(RqID)}, + { + ok, + Ctx, + {200, [RspHdr], Body, true, RqBdy} + }. + +%% @doc Record the output of the interaction +-spec record_request( + context(), + riak_api_web_handler:timings(), + riak_api_web_handler:completion() +) -> + ok. +record_request(_Ctx, Timings, Completion) -> + {A, B, C} = Timings, + io:format( + user, + "Request ~w with timings ~0p~n", + [Completion, {B - A, C - B, C - A}] + ). + + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +basic_handler_test_() -> + {setup, fun setup/0, fun cleanup/1, fun generator/1}. + +setup() -> + TestPort = find_available_port(lists:seq(8000, 8999)), + IPAddr = {127, 0, 0, 1}, + SpecName = riak_api_web:spec_name(http, IPAddr, TestPort), + Options = + [ + {name, SpecName}, + {ip, IPAddr}, + {port, TestPort}, + {acceptor_pool_start_size, 4} + ], + {ok, _Pid} = riak_api_web_socket:start_link(Options), + riak_api_web:add_routes([{10, ?MODULE}]), + {SpecName, IPAddr, TestPort} + . + +generator({_SpecName, IPAddr, Port}) -> + [ + request_single_value(IPAddr, Port, 32), + request_single_value(IPAddr, Port, 64), + request_single_value(IPAddr, Port, 2048), + pipeline_request_values(IPAddr, Port, 16) + ]. + +cleanup({_SpecName, _IPAddr, _Port}) -> + ok. + +-define(REQUEST_BIN(ID, Size, KeepAlive), + io_lib:format( + << + "GET /random_data?required_size=~w HTTP/1.1\r\n" + "X-Riak-request_id: ~w\r\n" + "Connection: ~w\r\n" + "Content-Length: 0\r\n" + "\r\n" + >>, + [Size, ID, KeepAlive] + ) +). + +request_single_value(IPAddr, Port, Size) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = ?REQUEST_BIN(1, Size, close), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + ?assertMatch(<<>>, validate_response(Data, Size, Socket)), + ok = gen_tcp:close(Socket) + end. + +pipeline_request_values(IPAddr, Port, Size) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Requests = + lists:map( + fun(I) -> ?REQUEST_BIN(I, Size, 'keep-alive') end, + lists:seq(1, 5) + ), + Request = iolist_to_binary(Requests), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + R1 = validate_response(Data, Size, Socket), + R2 = validate_response(R1, Size, Socket), + R3 = validate_response(R2, Size, Socket), + R4 = validate_response(R3, Size, Socket), + <<>> = validate_response(R4, Size, Socket), + ok = gen_tcp:close(Socket) + end. + +extract_headers(Data, Socket) -> + maybe + {ok, L1, R1} ?= erlang:decode_packet(line, Data, []), + ?assertMatch(L1, <<"HTTP/1.1 200 OK\r\n">>), + {ok, L2, R2} ?= erlang:decode_packet(line, R1, []), + {ok, L3, R3} ?= erlang:decode_packet(line, R2, []), + {ok, L4, R4} ?= erlang:decode_packet(line, R3, []), + {ok, L5, R5} ?= erlang:decode_packet(line, R4, []), + {ok, L6, R6} ?= erlang:decode_packet(line, R5, []), + {ok, <<"\r\n">>, R7} ?= erlang:decode_packet(line, R6, []), + { + lists:map( + fun(S) -> hd(string:split(S, <<":">>, leading)) end, + lists:sort([L2, L3, L4, L5, L6]) + ), + R7 + } + else + {more, _} -> + {ok, More} = gen_tcp:recv(Socket, 0), + extract_headers(<>, Socket) + end. + +validate_response(Data, Size, Socket) -> + {HeaderKeys, Rem} = extract_headers(Data, Socket), + ?assertMatch( + [ + <<"Connection">>, + <<"Content-Length">>, + <<"Date">>, + <<"Server">>, + <<"X-Riak-request_id">> + ], + HeaderKeys + ), + {ok, RspBody, Rest} = erlang:decode_packet(0, Rem, []), + <> = RspBody, + ?assertMatch(Size, byte_size(ExpectedBody)), + <>. + +find_available_port([]) -> + no_port_found; +find_available_port([Port|Rest]) -> + case gen_tcp:listen(Port, []) of + {ok, Sock} -> + ok = gen_tcp:close(Sock), + Port; + _ -> + find_available_port(Rest) + end. + +-endif. \ No newline at end of file From 2812d4babcceafa7e879a76901779052dfa75ee2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 30 Mar 2026 23:25:29 +0100 Subject: [PATCH 11/53] Revert clock bakc to ETS, and add negative tests --- src/riak_api_web.erl | 2 +- src/riak_api_web_acceptor.erl | 163 ++++++++++++++++++++----------- src/riak_api_web_headers.erl | 4 +- src/riak_api_web_socket.erl | 32 +++--- test/riak_api_web_get_random.erl | 133 +++++++++++++++++++++---- 5 files changed, 234 insertions(+), 100 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index f1a2851..886403c 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -62,7 +62,7 @@ get_route(Method, Path) -> get_route(CurrentRoutes, Method, Path). get_route([], _Method, _Path) -> - {halt, 404, none, <<>>, []}; + {halt, 404, [], <<>>, []}; get_route([{_P, CallbackMod} | Rest], Method, Path) -> case CallbackMod:match_route(Method, Path) of no_match -> diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 9ad855d..0d149ec 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -26,7 +26,7 @@ -feature(maybe_expr, enable). -endif. --export([start_link/2, init/3]). +-export([start_link/1, init/2]). -export([extend_buffer/4, start_clock/0]). @@ -38,17 +38,15 @@ -type response_code() :: 200..204 + | 206 | 300..304 | 400 - | 401 - | 403..406 - | 408..415 - | 428..429 + | 401..406 + | 408..418 + | 421..429 | 431 - | 500 - | 503 - | 505 - | 507. + | 451 + | 500..508. -type method() :: 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE'. @@ -71,8 +69,7 @@ response_code(), riak_api_web_headers:header_list(), binary(), - riak_api_web_socket:socket(), - ets:table() + riak_api_web_socket:socket() }. -type good_result() :: { @@ -96,21 +93,21 @@ %%% API %%%============================================================================ --spec start_link(riak_api_web_socket:socket(), ets:table()) -> pid(). -start_link(Socket, Clock) -> - spawn_link(?MODULE, init, [self(), Socket, Clock]). +-spec start_link(riak_api_web_socket:socket()) -> pid(). +start_link(Socket) -> + spawn_link(?MODULE, init, [self(), Socket]). --spec init(pid(), riak_api_web_socket:socket(), ets:table()) -> ok. -init(Server, Listener, Clock) -> +-spec init(pid(), riak_api_web_socket:socket()) -> ok. +init(Server, Listener) -> case riak_api_web_socket:accept(Listener, ?ACCEPT_TIMEOUT) of {ok, Socket} -> ok = riak_api_web_socket:acceptor_accepted(Server), - loop(Socket, <<>>, Clock); + loop(Socket, <<>>); {error, timeout} -> - init(Server, Listener, Clock); + init(Server, Listener); {error, {tls_alert, Alert}} -> ?LOG_WARNING("TLS Alert received ~0p", [Alert]), - init(Server, Listener, Clock); + init(Server, Listener); {error, closed} -> ok; {error, Other} -> @@ -121,13 +118,13 @@ init(Server, Listener, Clock) -> %%% Primary Loop %%%============================================================================ --spec loop(riak_api_web_socket:socket(), binary(), ets:table()) -> ok. -loop(Socket, InitBuffer, Clock) -> +-spec loop(riak_api_web_socket:socket(), binary()) -> ok. +loop(Socket, InitBuffer) -> %% In the keepalive loop, the send buffer is assumed to be empty %% An so pipelining of requests (in parallel) is explicitly not supported - case handle_request(Socket, InitBuffer, Clock) of + case handle_request(Socket, InitBuffer) of {KeepAlive, Buffer} when KeepAlive == true -> - loop(Socket, Buffer, Clock); + loop(Socket, Buffer); _Close -> riak_api_web_socket:close(Socket), ok @@ -135,11 +132,10 @@ loop(Socket, InitBuffer, Clock) -> -spec handle_request( riak_api_web_socket:socket(), - binary(), - ets:table() + binary() ) -> {boolean(), binary()} | close. -handle_request(Socket, InitBuffer, Clock) -> +handle_request(Socket, InitBuffer) -> StartTime = os:system_time(microsecond), reset_version(), RequestResult = @@ -194,7 +190,7 @@ handle_request(Socket, InitBuffer, Clock) -> MergedRspHeaders = riak_api_web_headers:enter_from_list( RspHeaders, - default_response_headers(Clock, Keepalive) + default_response_headers(Keepalive) ), { finish, @@ -210,7 +206,7 @@ handle_request(Socket, InitBuffer, Clock) -> else {halt, HaltRspCode, HaltRspHeaders, HaltRspText, HaltRspSubs} -> HaltRspBody = generate_error_body(HaltRspText, HaltRspSubs), - {halt, HaltRspCode, HaltRspHeaders, HaltRspBody, Socket, Clock} + {halt, HaltRspCode, HaltRspHeaders, HaltRspBody, Socket} end, handle_response(RequestResult). @@ -530,11 +526,11 @@ handle_response( send_complete ), {Keepalive, BufferIn}; -handle_response({halt, RspCode, RspHeaders, RspBody, Socket, Clock}) -> +handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> MergedRspHeaders = riak_api_web_headers:enter_from_list( RspHeaders, - default_response_headers(Clock, false) + default_response_headers(false) ), send_response(RspCode, MergedRspHeaders, RspBody, Socket), close. @@ -638,9 +634,7 @@ get_response_line({1, 0}, RspCode) -> iolist_to_binary( [ <<"HTTP/1.0 ">>, - integer_to_binary(RspCode), - <<" ">>, - httpd_util:reason_phrase(RspCode), + reason_phrase(RspCode), <<"\r\n">> ] ); @@ -648,34 +642,34 @@ get_response_line({1, 1}, RspCode) -> iolist_to_binary( [ <<"HTTP/1.1 ">>, - integer_to_binary(RspCode), - <<" ">>, - httpd_util:reason_phrase(RspCode), + reason_phrase(RspCode), <<"\r\n">> ] ). +-spec start_clock() -> ok. start_clock() -> - ets:new( - ?MODULE, - [public, {read_concurrency, true}] - ). + ?MODULE = + ets:new( + ?MODULE, + [named_table, public, {read_concurrency, true}] + ), + ok. -spec default_response_headers( - ets:table(), boolean() ) -> riak_api_web_headers:headers(). -default_response_headers(Clock, KeepAlive) -> +default_response_headers(KeepAlive) -> DateHeader = - case {os:system_time(second), ets:lookup(Clock, rfc1123)} of + case {os:system_time(second), ets:lookup(?MODULE, rfc1123)} of {Now, [{rfc1123, {CachedTime, CachedHdr}}]} when Now == CachedTime -> CachedHdr; {Now, _} -> Hdr = {'Date', list_to_binary(httpd_util:rfc1123_date())}, - ets:insert(Clock, {rfc1123, {Now, Hdr}}), + ets:insert(?MODULE, {rfc1123, {Now, Hdr}}), Hdr end, ServerHeader = {'Server', <<"RiakAPI/4.0 SilverMachine">>}, @@ -690,6 +684,59 @@ default_response_headers(Clock, KeepAlive) -> [ServerHeader, DateHeader, ConnectionHeader] ). +%% @doc +%% The http_util:reason_phrase/1 returns Object Not Found not Not Found +%% these are taken direct from RFC 2616 +-spec reason_phrase(response_code()) -> binary(). +reason_phrase(200) -> <<"200 OK">>; +reason_phrase(201) -> <<"201 Created">>; +reason_phrase(202) -> <<"202 Accepted">>; +reason_phrase(203) -> <<"203 Non-Authoritative Information">>; +reason_phrase(204) -> <<"204 No Content">>; +reason_phrase(206) -> <<"206 Partial Content">>; +reason_phrase(300) -> <<"300 Multiple Choices">>; +reason_phrase(301) -> <<"301 Moved Permanently">>; +reason_phrase(302) -> <<"302 Found">>; +reason_phrase(303) -> <<"303 See Other">>; +reason_phrase(304) -> <<"304 Not Modified">>; +reason_phrase(400) -> <<"400 Bad Request">>; +reason_phrase(401) -> <<"401 Unauthorized">>; +reason_phrase(402) -> <<"402 Payment Required">>; +reason_phrase(403) -> <<"403 Forbidden">>; +reason_phrase(404) -> <<"404 Not Found">>; +reason_phrase(405) -> <<"405 Method Not Allowed">>; +reason_phrase(406) -> <<"406 Not Acceptable">>; +reason_phrase(408) -> <<"408 Request Timeout">>; +reason_phrase(409) -> <<"409 Conflict">>; +reason_phrase(410) -> <<"410 Gone">>; +reason_phrase(411) -> <<"411 Length Required">>; +reason_phrase(412) -> <<"412 Precondition Failed">>; +reason_phrase(413) -> <<"413 Request Entity Too Large">>; +reason_phrase(414) -> <<"414 Request-URI Too Long">>; +reason_phrase(415) -> <<"415 Unsupported Media Type">>; +reason_phrase(416) -> <<"416 Requested Range Not Satisfiable">>; +reason_phrase(417) -> <<"417 Expectation Failed">>; +reason_phrase(418) -> <<"418 I'm a teapot">>; +reason_phrase(421) -> <<"421 Misdirected Request">>; +reason_phrase(422) -> <<"422 Unprocessable Entity">>; +reason_phrase(423) -> <<"423 Locked">>; +reason_phrase(424) -> <<"424 Failed Dependency">>; +reason_phrase(425) -> <<"425 Unordered Collection">>; +reason_phrase(426) -> <<"426 Upgrade Required">>; +reason_phrase(428) -> <<"428 Precondition Required">>; +reason_phrase(429) -> <<"429 Too Many Requests">>; +reason_phrase(431) -> <<"431 Request Header Fields Too Large">>; +reason_phrase(451) -> <<"451 Unavailable For Legal Reasons">>; +reason_phrase(500) -> <<"500 Internal Server Error">>; +reason_phrase(501) -> <<"501 Not Implemented">>; +reason_phrase(502) -> <<"502 Bad Gateway">>; +reason_phrase(503) -> <<"503 Service Unavailable">>; +reason_phrase(504) -> <<"504 Gateway Timeout">>; +reason_phrase(505) -> <<"505 HTTP Version Not Supported">>; +reason_phrase(506) -> <<"506 Variant Also Negotiates">>; +reason_phrase(507) -> <<"507 Insufficient Storage">>; +reason_phrase(508) -> <<"508 Loop Detected">>. + %%%============================================================================ %%% Eunit tests %%%============================================================================ @@ -698,19 +745,19 @@ default_response_headers(Clock, KeepAlive) -> -include_lib("eunit/include/eunit.hrl"). clock_test() -> - Clock = start_clock(), + ok = start_clock(), {TC1, _Hdrs1} = - timer:tc(fun() -> default_response_headers(Clock, true) end), + timer:tc(fun() -> default_response_headers(true) end), {TC2, _Hdrs2} = - timer:tc(fun() -> default_response_headers(Clock, true) end), + timer:tc(fun() -> default_response_headers(true) end), {TC3, _Hdrs3} = - timer:tc(fun() -> default_response_headers(Clock, false) end), + timer:tc(fun() -> default_response_headers(false) end), {TC4, _Hdrs4} = - timer:tc(fun() -> default_response_headers(Clock, true) end), + timer:tc(fun() -> default_response_headers(true) end), timer:sleep(1000), {TC5, _Hdrs5} = - timer:tc(fun() -> default_response_headers(Clock, true) end), - ?assertMatch(1, ets:info(Clock, size)), + timer:tc(fun() -> default_response_headers(true) end), + ?assertMatch(1, ets:info(?MODULE, size)), MeanUnCached = (TC1 + TC5) div 2, MeanCached = (TC2 + TC3 + TC4) div 3, io:format( @@ -719,15 +766,15 @@ clock_test() -> [MeanCached, MeanUnCached] ), ?assert(MeanCached < MeanUnCached), - ets:delete(Clock). + ets:delete(?MODULE). simple_response_test() -> - Clock = start_clock(), + ok = start_clock(), set_version({1, 1}), FullResponse = generate_binary_response( 200, - default_response_headers(Clock, false), + default_response_headers(false), <<"OutputOK">> ), Date = list_to_binary(httpd_util:rfc1123_date()), @@ -744,10 +791,10 @@ simple_response_test() -> <<"OutputOK">>/binary >>, ?assertMatch(ExpectedResponse, FullResponse), - ets:delete(Clock). + ets:delete(?MODULE). simple_stream_test() -> - Clock = start_clock(), + ok = start_clock(), SendFun = fun(Bin) when is_binary(Bin) -> case get({?MODULE, ?TEST, send_buffer}) of @@ -774,7 +821,7 @@ simple_stream_test() -> Date = list_to_binary(httpd_util:rfc1123_date()), stream_response( 200, - default_response_headers(Clock, true), + default_response_headers(true), stream_fun(), SendFun ), @@ -795,7 +842,7 @@ simple_stream_test() -> >>/binary >>, ?assertMatch(ExpectedResponse, Response), - ets:delete(Clock). + ets:delete(?MODULE). stream_fun() -> fun() -> diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 5b04d7d..4bd8038 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -291,11 +291,11 @@ parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}) -> parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {[], 0}). parse_request_block(_B, _BFun, {MaxCount, _MS}, {_H, C}) when C > MaxCount -> - {halt, 431, none, ?COUNT_EXCEEDED, [MaxCount]}; + {halt, 431, [], ?COUNT_EXCEEDED, [MaxCount]}; parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {HeaderAcc, C}) -> case erlang:decode_packet(httph_bin, Buffer, []) of {ok, {http_header, _, _, _, V}, _} when byte_size(V) > MaxSize -> - {halt, 431, none, ?SIZE_EXCEEDED, [MaxSize]}; + {halt, 431, [], ?SIZE_EXCEEDED, [MaxSize]}; {ok, {http_header, _, Key, _OrigKey, Value}, Rest} when is_atom(Key) -> parse_request_block( Rest, diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 165651d..7a0e4e7 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -92,8 +92,7 @@ listener :: socket(), pool_size = ?POOL_SIZE_DEFAULT :: pos_integer(), max_pool_size = ?POOL_SIZE_MAX_DEFAULT :: pos_integer(), - acceptor_pool = sets:new([{version, 2}]) :: sets:set(), - clock :: ets:table() + acceptor_pool = sets:new([{version, 2}]) :: sets:set() }). -type socket_option() :: @@ -220,9 +219,8 @@ init(Options) -> end, SocketOpts = default_socket_options(IP), {ok, Listener} = listen(Protocol, Port, SocketOpts, BufferOpts, SSLOpts), - Clock = riak_api_web_acceptor:start_clock(), - {AcceptorPool, StartSize, MaxSize} = - get_acceptor_pool(Listener, Options, Clock), + riak_api_web_acceptor:start_clock(), + {AcceptorPool, StartSize, MaxSize} = get_acceptor_pool(Listener, Options), ?LOG_INFO( "Acceptor pool for web started on IP ~0p port ~w of size ~w", [IP, Port, StartSize] @@ -234,8 +232,7 @@ init(Options) -> port = Port, pool_size = StartSize, max_pool_size = MaxSize, - acceptor_pool = sets:from_list(AcceptorPool, [{version, 2}]), - clock = Clock + acceptor_pool = sets:from_list(AcceptorPool, [{version, 2}]) } }. @@ -261,8 +258,7 @@ handle_cast(accepted, State) -> PS when PS < State#socket_state.max_pool_size -> P = riak_api_web_acceptor:start_link( - State#socket_state.listener, - State#socket_state.clock + State#socket_state.listener ), { noreply, @@ -293,8 +289,8 @@ handle_info({'EXIT', Pid, Reason}, State) -> ?LOG_ERROR("Acceptor ~p unexpectedly crashed: ~0p", [Pid, Reason]), handle_info({'EXIT', Pid, normal}, State). -terminate(_Reason, State) -> - ets:delete(State#socket_state.clock), +terminate(_Reason, _State) -> + ets:delete(?MODULE), ok. %%%============================================================================ @@ -311,9 +307,9 @@ default_socket_options(IPAddr) -> {active, false} ]. --spec get_acceptor_pool(socket(), list(option()), ets:table()) -> +-spec get_acceptor_pool(socket(), list(option())) -> {list(pid()), pos_integer(), pos_integer()}. -get_acceptor_pool(Listener, Options, Clock) -> +get_acceptor_pool(Listener, Options) -> StartSize = case lists:keyfind(web_acceptor_pool_start_size, 1, Options) of {acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> @@ -343,7 +339,7 @@ get_acceptor_pool(Listener, Options, Clock) -> MaxSize >= StartSize -> { - start_acceptor_pool(Listener, StartSize, Clock), + start_acceptor_pool(Listener, StartSize), StartSize, MaxSize }; @@ -354,17 +350,17 @@ get_acceptor_pool(Listener, Options, Clock) -> [InvalidConfig] ), { - start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT, Clock), + start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT), ?POOL_SIZE_DEFAULT, ?POOL_SIZE_MAX_DEFAULT } end. --spec start_acceptor_pool(socket(), pos_integer(), ets:table()) -> list(pid()). -start_acceptor_pool(Listener, Size, Clock) -> +-spec start_acceptor_pool(socket(), pos_integer()) -> list(pid()). +start_acceptor_pool(Listener, Size) -> lists:map( fun(_I) -> - P = riak_api_web_acceptor:start_link(Listener, Clock), + P = riak_api_web_acceptor:start_link(Listener), true = is_pid(P), P end, diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 72bed16..08ab3af 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -194,6 +194,49 @@ record_request(_Ctx, Timings, Completion) -> basic_handler_test_() -> {setup, fun setup/0, fun cleanup/1, fun generator/1}. +-define(REQUEST_BIN(ID, Size, KeepAlive), + io_lib:format( + << + "GET /random_data?required_size=~w HTTP/1.1\r\n" + "X-Riak-request_id: ~w\r\n" + "Connection: ~w\r\n" + "Content-Length: 0\r\n" + "\r\n" + >>, + [Size, ID, KeepAlive] + ) +). + +-define(BAD_VERSION, + << + "GET /random_data?required_size=~w HTTP1.1\r\n" + "X-Riak-request_id: 1\r\n" + "Connection: close\r\n" + "Content-Length: 0\r\n" + "\r\n" + >> +). + +-define(WRONG_URL, + << + "GET /randon_data?required_size=~w HTTP/1.1\r\n" + "X-Riak-request_id: 1\r\n" + "Connection: close\r\n" + "Content-Length: 0\r\n" + "\r\n" + >> +). + +-define(POST_NOT_GET, + << + "POST /random_data?required_size=~w HTTP/1.1\r\n" + "X-Riak-request_id: 1\r\n" + "Connection: close\r\n" + "Content-Length: 0\r\n" + "\r\n" + >> +). + setup() -> TestPort = find_available_port(lists:seq(8000, 8999)), IPAddr = {127, 0, 0, 1}, @@ -215,24 +258,29 @@ generator({_SpecName, IPAddr, Port}) -> request_single_value(IPAddr, Port, 32), request_single_value(IPAddr, Port, 64), request_single_value(IPAddr, Port, 2048), - pipeline_request_values(IPAddr, Port, 16) + pipeline_request_values(IPAddr, Port, 16), + request_error(IPAddr, Port, ?WRONG_URL, 404), + request_error(IPAddr, Port, ?POST_NOT_GET, 405), + request_error(IPAddr, Port, ?BAD_VERSION, 400) ]. cleanup({_SpecName, _IPAddr, _Port}) -> ok. --define(REQUEST_BIN(ID, Size, KeepAlive), - io_lib:format( - << - "GET /random_data?required_size=~w HTTP/1.1\r\n" - "X-Riak-request_id: ~w\r\n" - "Connection: ~w\r\n" - "Content-Length: 0\r\n" - "\r\n" - >>, - [Size, ID, KeepAlive] - ) -). +request_error(IPAddr, Port, Msg, ExpectedCode) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + ok = gen_tcp:send(Socket, Msg), + {ok, Data} = gen_tcp:recv(Socket, 0), + ?assertMatch(ok, validate_error(Data, ExpectedCode, Socket)), + ok = gen_tcp:close(Socket) + end. + request_single_value(IPAddr, Port, Size) -> fun() -> @@ -273,31 +321,51 @@ pipeline_request_values(IPAddr, Port, Size) -> ok = gen_tcp:close(Socket) end. -extract_headers(Data, Socket) -> +extract_headers(Data, Socket, ExpectedResponseLine) -> maybe {ok, L1, R1} ?= erlang:decode_packet(line, Data, []), - ?assertMatch(L1, <<"HTTP/1.1 200 OK\r\n">>), + ?assertMatch(L1, ExpectedResponseLine), {ok, L2, R2} ?= erlang:decode_packet(line, R1, []), {ok, L3, R3} ?= erlang:decode_packet(line, R2, []), {ok, L4, R4} ?= erlang:decode_packet(line, R3, []), {ok, L5, R5} ?= erlang:decode_packet(line, R4, []), - {ok, L6, R6} ?= erlang:decode_packet(line, R5, []), - {ok, <<"\r\n">>, R7} ?= erlang:decode_packet(line, R6, []), + {ok, MaybeL6, R6} ?= erlang:decode_packet(line, R5, []), + {ok, L6, Rem} ?= + case MaybeL6 of + <<"\r\n">> -> + {ok, none, R6}; + MaybeL6 -> + case erlang:decode_packet(line, R6, []) of + {ok, <<"\r\n">>, R7} -> + {ok, MaybeL6, R7}; + {more, _} -> + {more, undefined} + end + end, + { lists:map( fun(S) -> hd(string:split(S, <<":">>, leading)) end, - lists:sort([L2, L3, L4, L5, L6]) + lists:filter( + fun(H) -> H =/= none end, + lists:sort([L2, L3, L4, L5, L6]) + ) ), - R7 + Rem } else {more, _} -> {ok, More} = gen_tcp:recv(Socket, 0), - extract_headers(<>, Socket) + extract_headers( + <>, + Socket, + ExpectedResponseLine + ) end. validate_response(Data, Size, Socket) -> - {HeaderKeys, Rem} = extract_headers(Data, Socket), + {HeaderKeys, Rem} = + extract_headers(Data, Socket, <<"HTTP/1.1 200 OK\r\n">>), ?assertMatch( [ <<"Connection">>, @@ -313,6 +381,29 @@ validate_response(Data, Size, Socket) -> ?assertMatch(Size, byte_size(ExpectedBody)), <>. +validate_error(Data, ExpectedCode, Socket) -> + {ExpectedResponseLine, AdditionalHeaderKeys} = + case ExpectedCode of + 400 -> + {<<"HTTP/1.0 400 Bad Request\r\n">>, []}; + % As it was a bad version - can't assume 1.1 + 404 -> + {<<"HTTP/1.1 404 Not Found\r\n">>, []}; + 405 -> + {<<"HTTP/1.1 405 Method Not Allowed\r\n">>, [<<"Allow">>]} + end, + {HeaderKeys, _Rem} = extract_headers(Data, Socket, ExpectedResponseLine), + ExpectedHeaderKeys = + lists:sort( + [ + <<"Connection">>, + <<"Content-Length">>, + <<"Date">>, + <<"Server">> + ] ++ AdditionalHeaderKeys + ), + ?assertMatch(ExpectedHeaderKeys, HeaderKeys). + find_available_port([]) -> no_port_found; find_available_port([Port|Rest]) -> From 81d1b4e4286ad4272a5331471f4c52dd305e0faf Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 11:39:53 +0100 Subject: [PATCH 12/53] Add monitoring of exits on acceptors --- src/riak_api_web.erl | 5 ++- src/riak_api_web_acceptor.erl | 73 +++++++++----------------------- src/riak_api_web_socket.erl | 8 ++-- test/riak_api_web_get_random.erl | 52 +++++++++++++++++++++-- 4 files changed, 78 insertions(+), 60 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 886403c..5512cc7 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -114,7 +114,10 @@ binding_config(Scheme, Binding) -> { Name, - {riak_api_web_socket, start, [Config]}, permanent, 5000, worker, + {riak_api_web_socket, start, [Config]}, + permanent, + 5000, + worker, [riak_api_web_socket] }. diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 0d149ec..b624a9f 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -629,20 +629,35 @@ generate_binary_response(RspCode, RspHeaders, RspBody) -> RspBody/binary >>. +%% @doc +%% For performance reasons pre-create the whole line for the most common +%% scenarios -spec get_response_line(http_version(), response_code()) -> binary(). -get_response_line({1, 0}, RspCode) -> +get_response_line({1, 0}, 200) -> + <<"HTTP/1.0 200 OK\r\n">>; +get_response_line({1, 0}, 201) -> + <<"HTTP/1.0 201 Accepted\r\n">>; +get_response_line({1, 1}, 200) -> + <<"HTTP/1.1 200 OK\r\n">>; +get_response_line({1, 1}, 201) -> + <<"HTTP/1.1 201 Accepted\r\n">>; +get_response_line({1, 0}, Code) -> iolist_to_binary( [ <<"HTTP/1.0 ">>, - reason_phrase(RspCode), + integer_to_binary(Code), + <<" ">>, + reason_phrase(Code), <<"\r\n">> ] ); -get_response_line({1, 1}, RspCode) -> +get_response_line({1, 1}, Code) -> iolist_to_binary( [ <<"HTTP/1.1 ">>, - reason_phrase(RspCode), + integer_to_binary(Code), + <<" ">>, + reason_phrase(Code), <<"\r\n">> ] ). @@ -688,54 +703,8 @@ default_response_headers(KeepAlive) -> %% The http_util:reason_phrase/1 returns Object Not Found not Not Found %% these are taken direct from RFC 2616 -spec reason_phrase(response_code()) -> binary(). -reason_phrase(200) -> <<"200 OK">>; -reason_phrase(201) -> <<"201 Created">>; -reason_phrase(202) -> <<"202 Accepted">>; -reason_phrase(203) -> <<"203 Non-Authoritative Information">>; -reason_phrase(204) -> <<"204 No Content">>; -reason_phrase(206) -> <<"206 Partial Content">>; -reason_phrase(300) -> <<"300 Multiple Choices">>; -reason_phrase(301) -> <<"301 Moved Permanently">>; -reason_phrase(302) -> <<"302 Found">>; -reason_phrase(303) -> <<"303 See Other">>; -reason_phrase(304) -> <<"304 Not Modified">>; -reason_phrase(400) -> <<"400 Bad Request">>; -reason_phrase(401) -> <<"401 Unauthorized">>; -reason_phrase(402) -> <<"402 Payment Required">>; -reason_phrase(403) -> <<"403 Forbidden">>; -reason_phrase(404) -> <<"404 Not Found">>; -reason_phrase(405) -> <<"405 Method Not Allowed">>; -reason_phrase(406) -> <<"406 Not Acceptable">>; -reason_phrase(408) -> <<"408 Request Timeout">>; -reason_phrase(409) -> <<"409 Conflict">>; -reason_phrase(410) -> <<"410 Gone">>; -reason_phrase(411) -> <<"411 Length Required">>; -reason_phrase(412) -> <<"412 Precondition Failed">>; -reason_phrase(413) -> <<"413 Request Entity Too Large">>; -reason_phrase(414) -> <<"414 Request-URI Too Long">>; -reason_phrase(415) -> <<"415 Unsupported Media Type">>; -reason_phrase(416) -> <<"416 Requested Range Not Satisfiable">>; -reason_phrase(417) -> <<"417 Expectation Failed">>; -reason_phrase(418) -> <<"418 I'm a teapot">>; -reason_phrase(421) -> <<"421 Misdirected Request">>; -reason_phrase(422) -> <<"422 Unprocessable Entity">>; -reason_phrase(423) -> <<"423 Locked">>; -reason_phrase(424) -> <<"424 Failed Dependency">>; -reason_phrase(425) -> <<"425 Unordered Collection">>; -reason_phrase(426) -> <<"426 Upgrade Required">>; -reason_phrase(428) -> <<"428 Precondition Required">>; -reason_phrase(429) -> <<"429 Too Many Requests">>; -reason_phrase(431) -> <<"431 Request Header Fields Too Large">>; -reason_phrase(451) -> <<"451 Unavailable For Legal Reasons">>; -reason_phrase(500) -> <<"500 Internal Server Error">>; -reason_phrase(501) -> <<"501 Not Implemented">>; -reason_phrase(502) -> <<"502 Bad Gateway">>; -reason_phrase(503) -> <<"503 Service Unavailable">>; -reason_phrase(504) -> <<"504 Gateway Timeout">>; -reason_phrase(505) -> <<"505 HTTP Version Not Supported">>; -reason_phrase(506) -> <<"506 Variant Also Negotiates">>; -reason_phrase(507) -> <<"507 Insufficient Storage">>; -reason_phrase(508) -> <<"508 Loop Detected">>. +reason_phrase(404) -> <<"Not Found">>; +reason_phrase(N) -> httpd_util:reason_phrase(N). %%%============================================================================ %%% Eunit tests diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 7a0e4e7..8b7a722 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -196,6 +196,7 @@ acceptor_accepted(Pid) -> %%%============================================================================ init(Options) -> + process_flag(trap_exit, true), BufferOpts = case get_tcp_buffer_options() of [] -> @@ -256,7 +257,7 @@ handle_cast({set_max_pool_size, MPS}, State) -> handle_cast(accepted, State) -> case State#socket_state.pool_size of PS when PS < State#socket_state.max_pool_size -> - P = + P = riak_api_web_acceptor:start_link( State#socket_state.listener ), @@ -290,7 +291,6 @@ handle_info({'EXIT', Pid, Reason}, State) -> handle_info({'EXIT', Pid, normal}, State). terminate(_Reason, _State) -> - ets:delete(?MODULE), ok. %%%============================================================================ @@ -312,7 +312,7 @@ default_socket_options(IPAddr) -> get_acceptor_pool(Listener, Options) -> StartSize = case lists:keyfind(web_acceptor_pool_start_size, 1, Options) of - {acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> + {web_acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> SS; false -> application:get_env( @@ -323,7 +323,7 @@ get_acceptor_pool(Listener, Options) -> end, MaxSize = case lists:keyfind(web_acceptor_pool_max_size, 1, Options) of - {acceptor_pool_start_size, MS} when is_integer(MS), MS > 0 -> + {web_acceptor_pool_start_size, MS} when is_integer(MS), MS > 0 -> MS; false -> application:get_env( diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 08ab3af..0c01681 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -190,6 +190,7 @@ record_request(_Ctx, Timings, Completion) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). basic_handler_test_() -> {setup, fun setup/0, fun cleanup/1, fun generator/1}. @@ -246,10 +247,12 @@ setup() -> {name, SpecName}, {ip, IPAddr}, {port, TestPort}, - {acceptor_pool_start_size, 4} + {web_acceptor_pool_start_size, 4} ], {ok, _Pid} = riak_api_web_socket:start_link(Options), riak_api_web:add_routes([{10, ?MODULE}]), + {ok, _HTTPC} = inets:start(httpc, [{profile, test_client}]), + ok = httpc:set_options([{verbose, false}], test_client), {SpecName, IPAddr, TestPort} . @@ -261,10 +264,14 @@ generator({_SpecName, IPAddr, Port}) -> pipeline_request_values(IPAddr, Port, 16), request_error(IPAddr, Port, ?WRONG_URL, 404), request_error(IPAddr, Port, ?POST_NOT_GET, 405), - request_error(IPAddr, Port, ?BAD_VERSION, 400) + request_error(IPAddr, Port, ?BAD_VERSION, 400), + request_with_httpc(IPAddr, Port, 128), + request_with_httpc(IPAddr, Port, 16) ]. -cleanup({_SpecName, _IPAddr, _Port}) -> +cleanup({SpecName, _IPAddr, _Port}) -> + ok = inets:stop(), + ?assertMatch(4, riak_api_web_socket:get_active_pool_size(SpecName)), ok. request_error(IPAddr, Port, Msg, ExpectedCode) -> @@ -281,6 +288,45 @@ request_error(IPAddr, Port, Msg, ExpectedCode) -> ok = gen_tcp:close(Socket) end. +request_with_httpc({A, B, C, D}, Port, Size) -> + fun() -> + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/random_data?required_size=~w", + [A, B, C, D, Port, Size] + ) + ), + {ok, {{"HTTP/1.1", 200, "OK"}, ResponseHeaders, ResponseBody}} = + httpc:request( + get, + { + URI, + [{"X-Riak-request_id", integer_to_binary(1)}] + }, + [], + [], + test_client + ), + ?assertMatch( + {"connection", "keep-alive"}, + lists:keyfind("connection", 1, ResponseHeaders) + ), + ?assertMatch( + {"server", "RiakAPI/4.0 SilverMachine"}, + lists:keyfind("server", 1, ResponseHeaders) + ), + SizeL = integer_to_list(Size), + ?assertMatch( + {"content-length", SizeL}, + lists:keyfind("content-length", 1, ResponseHeaders) + ), + ?assertMatch( + {"x-riak-request_id", "1"}, + lists:keyfind("x-riak-request_id", 1, ResponseHeaders) + ), + ?assertMatch(Size, length(ResponseBody)) + end. request_single_value(IPAddr, Port, Size) -> fun() -> From f4a27d3d2fa1136aecfc29689aaf8a54c52fb11d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 12:02:21 +0100 Subject: [PATCH 13/53] Pass the split path as well as path --- src/riak_api_web.erl | 20 ++++---- src/riak_api_web_acceptor.erl | 13 +++-- src/riak_api_web_handler.erl | 3 +- src/riak_api_wm_urlmap.erl | 81 -------------------------------- test/riak_api_web_get_random.erl | 12 ++--- 5 files changed, 27 insertions(+), 102 deletions(-) delete mode 100644 src/riak_api_wm_urlmap.erl diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 5512cc7..2881895 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -29,7 +29,7 @@ get_listeners/0, binding_config/2, add_routes/1, - get_route/2, + get_route/3, spec_name/3 ] ). @@ -48,7 +48,8 @@ add_routes(Routes) -> -spec get_route( riak_api_web_acceptor:method(), - unicode:chardata() + unicode:chardata(), + list(unicode:chardata()) ) -> { ok, @@ -57,16 +58,16 @@ add_routes(Routes) -> {pos_integer(), pos_integer(), pos_integer()} } | riak_api_web_acceptor:halt_response(). -get_route(Method, Path) -> +get_route(Method, Path, SplitPath) -> CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), - get_route(CurrentRoutes, Method, Path). + get_route(CurrentRoutes, Method, Path, SplitPath). -get_route([], _Method, _Path) -> +get_route([], _Method, _Path, _SP) -> {halt, 404, [], <<>>, []}; -get_route([{_P, CallbackMod} | Rest], Method, Path) -> - case CallbackMod:match_route(Method, Path) of +get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> + case CallbackMod:match_route(Method, Path, SplitPath) of no_match -> - get_route(Rest, Method, Path); + get_route(Rest, Method, Path, SplitPath); {method_not_allowed, AllowedMethods} -> AllowHdrVal = iolist_to_binary( @@ -176,6 +177,5 @@ common_config() -> http_logdir, app_helper:get_env(riak_core, platform_log_dir, "log") )}, - {backlog, 128}, - {dispatch, [{[], riak_api_wm_urlmap, []}]} + {backlog, 128} ]. diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index b624a9f..f4bdddf 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -144,14 +144,14 @@ handle_request(Socket, InitBuffer) -> {ok, {Method, RawPath, Version, HdrBuffer}} ?= get_request_line(Socket, InitBuffer), set_version(Version), - {ok, {Path, QueryParams}} ?= split_path(RawPath), + {ok, {Path, SplitPath, QueryParams}} ?= split_path(RawPath), { ok, CallbackMod, InitModCtx, {MaxHdrCount, MaxHdrSize, MaxBodySize} } ?= - riak_api_web:get_route(Method, Path), + riak_api_web:get_route(Method, Path, SplitPath), {ok, ReqHeaders, BdyBuffer} ?= get_request_headers( HdrBuffer, @@ -246,16 +246,21 @@ bad_request(Error, Subs) -> ) -> { ok, - {unicode:chardata(), [{unicode:chardata(), unicode:chardata() | true}]} + { + unicode:chardata(), + list(unicode:chardata()), + [{unicode:chardata(), unicode:chardata() | true}] + } } | halt_response(). split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), + SplitPath = string:split(Path, <<"/">>, all), case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> - {ok, {Path, QueryParams}}; + {ok, {Path, SplitPath, QueryParams}}; {error, QTerm, QReason} -> bad_request( <<"Query parameters not parsed ~w - ~0p">>, diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 3a437b7..c4694c4 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -67,7 +67,8 @@ %% limits for that route. -callback match_route( riak_api_web_acceptor:method(), - unicode:chardata() + unicode:chardata(), + list(unicode:chardata()) ) -> no_match | {method_not_allowed, list(riak_api_web_acceptor:method())} | diff --git a/src/riak_api_wm_urlmap.erl b/src/riak_api_wm_urlmap.erl deleted file mode 100644 index 02767a8..0000000 --- a/src/riak_api_wm_urlmap.erl +++ /dev/null @@ -1,81 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% riak_api_wm_urlmap: expose the roots of registered Webmachine resources -%% -%% Copyright (c) 2007-2013 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% @doc This module provides a Webmachine resource that lists the -%% URLs for other resources available on this host. -%% -%% Links to Riak resources will be added to the Link header in -%% the form: -%%``` -%% ; rel="RESOURCE_NAME" -%%''' -%% HTML output of this resource is a list of link tags like: -%%``` -%% RESOURCE_NAME -%%''' -%% JSON output of this resource in an object with elements like: -%%``` -%% "RESOURCE_NAME":"URL" -%%''' --module(riak_api_wm_urlmap). --export([ - init/1, - resource_exists/2, - content_types_provided/2, - to_html/2, - to_json/2 - ]). - --include_lib("webmachine/include/webmachine.hrl"). - -init([]) -> - {ok, service_list()}. - -resource_exists(RD, Services) -> - {true, add_link_header(RD, Services), Services}. - -add_link_header(RD, Services) -> - wrq:set_resp_header( - "Link", - string:join([ ["<",Uri,">; rel=\"",Resource,"\""] - || {Resource, Uri} <- Services ], - ","), - RD). - -content_types_provided(RD, Services) -> - {[{"text/html", to_html},{"application/json", to_json}], RD, Services}. - -to_html(RD, Services) -> - {[""], - RD, Services}. - -to_json(RD, Services) -> - {mochijson:encode({struct, Services}), RD, Services}. - -service_list() -> - Dispatch = webmachine_router:get_routes(), - lists:usort( - [{atom_to_list(Resource), "/"++UriBase} - || {[UriBase|_], Resource, _} <- Dispatch]). diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 0c01681..ea10668 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -29,7 +29,7 @@ -export( [ - match_route/2, + match_route/3, check_permissions/4, parse_query_params/2, parse_request_headers/2, @@ -63,16 +63,17 @@ %% @doc match_route for the module -spec match_route( riak_api_web_acceptor:method(), - unicode:chardata() + unicode:chardata(), + list(unicode:chardata()) ) -> no_match | {method_not_allowed, list(riak_api_web_acceptor:method())} | {ok, context(), riak_api_web_handler:limits()}. -match_route('GET', <<"/random_data">>) -> +match_route('GET', <<"/random_data">>, _SP) -> {ok, #context{}, {10, 1024, 128 * 1024}}; -match_route(_, <<"/random_data">>) -> +match_route(_, <<"/random_data">>, _SP) -> {method_not_allowed, ['GET']}; -match_route(_, _) -> +match_route(_, _, _) -> no_match. %% @doc check_permissions for using this module or route @@ -190,7 +191,6 @@ record_request(_Ctx, Timings, Completion) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --include_lib("stdlib/include/assert.hrl"). basic_handler_test_() -> {setup, fun setup/0, fun cleanup/1, fun generator/1}. From be070449aa03a213fc01bf9ebaabdc0a2999688b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 13:45:02 +0100 Subject: [PATCH 14/53] Extend tests, add KV store test --- rebar.config | 3 +- src/riak_api_web_acceptor.erl | 11 +- src/riak_api_web_security.erl | 11 +- src/riak_api_web_socket.erl | 14 +- test/riak_api_web_ets_store.erl | 325 +++++++++++++++++++++++++++++++ test/riak_api_web_get_random.erl | 2 + 6 files changed, 356 insertions(+), 10 deletions(-) create mode 100644 test/riak_api_web_ets_store.erl diff --git a/rebar.config b/rebar.config index e68ce2b..0827d3d 100644 --- a/rebar.config +++ b/rebar.config @@ -15,7 +15,8 @@ "src/riak_api_web_socket.erl", "src/riak_api_web.erl", "src/riak_api_web_handler.erl" - "test/end_to_end/*.erl", + "test/riak_api_web_get_random.erl", + "test/riak_api_web_ets_store.erl", "rebar.config" ]}, {exclude_files, []} diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index f4bdddf..aafb324 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4, start_clock/0]). +-export([extend_buffer/4, start_clock/0, stop_clock/0]). -include_lib("kernel/include/logger.hrl"). @@ -676,6 +676,10 @@ start_clock() -> ), ok. +-spec stop_clock() -> true. +stop_clock() -> + ets:delete(?MODULE). + -spec default_response_headers( boolean() ) -> @@ -706,9 +710,12 @@ default_response_headers(KeepAlive) -> %% @doc %% The http_util:reason_phrase/1 returns Object Not Found not Not Found -%% these are taken direct from RFC 2616 +%% these are taken direct from RFC 2616. Likewise "Request Entity Too Large" +%% rather than the more common "Content Too Large" -spec reason_phrase(response_code()) -> binary(). reason_phrase(404) -> <<"Not Found">>; +reason_phrase(413) -> <<"Content Too Large">>; +reason_phrase(431) -> <<"Request Header Fields Too Large">>; reason_phrase(N) -> httpd_util:reason_phrase(N). %%%============================================================================ diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 2f42374..946820e 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -62,14 +62,14 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> catch _:ExError -> ?LOG_WARNING("Error decoding credentials ~0p", [ExError]), - {halt, 400, none, <<"Error decoding credentials">>, []} + {halt, 400, [], <<"Error decoding credentials">>, []} end; Unexpected -> ?LOG_WARNING("Error decoding credentials ~0p", [Unexpected]), - {halt, 400, none, <<"Error decoding credentials">>, []} + {halt, 400, [], <<"Error decoding credentials">>, []} end; is_authorised(true, http, _ReqHeaders, _Peer, _AuthFun) -> - {halt, 426, none, <<"Upgrade required to https">>}; + {halt, 426, [], <<"Upgrade required to https">>}; is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> {true, undefined}. @@ -79,7 +79,6 @@ is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --include_lib("stdlib/include/assert.hrl"). simple_security_test() -> User1 = <<"User1">>, @@ -110,7 +109,7 @@ simple_security_test() -> ) ), ?assertMatch( - {halt, 400, none, <<"Error decoding credentials">>, []}, + {halt, 400, [], <<"Error decoding credentials">>, []}, is_authorised( true, https, @@ -141,7 +140,7 @@ simple_security_test() -> ] ), ?assertMatch( - {halt, 400, none, <<"Error decoding credentials">>, []}, + {halt, 400, [], <<"Error decoding credentials">>, []}, is_authorised( true, https, diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 8b7a722..050f8e1 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -77,6 +77,7 @@ recv_line/2, send/2, close/1, + stop/1, get_peer/1, acceptor_accepted/1 ] @@ -191,6 +192,14 @@ set_max_pool_size(ServerName, MaxPoolSize) when is_integer(MaxPoolSize) -> acceptor_accepted(Pid) -> gen_server:cast(Pid, accepted). +-spec stop(server_name()) -> ok. +stop(ServerName) -> + gen_server:call( + binary_to_existing_atom(ServerName), + stop, + infinity + ). + %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -240,7 +249,9 @@ init(Options) -> handle_call(get_max_pool_size, _From, State) -> {reply, State#socket_state.max_pool_size, State}; handle_call(get_active_pool_size, _From, State) -> - {reply, sets:size(State#socket_state.acceptor_pool), State}. + {reply, sets:size(State#socket_state.acceptor_pool), State}; +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. handle_cast({set_max_pool_size, MPS}, State) -> case State#socket_state.pool_size of @@ -291,6 +302,7 @@ handle_info({'EXIT', Pid, Reason}, State) -> handle_info({'EXIT', Pid, normal}, State). terminate(_Reason, _State) -> + riak_api_web_acceptor:stop_clock(), ok. %%%============================================================================ diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl new file mode 100644 index 0000000..ee1c71f --- /dev/null +++ b/test/riak_api_web_ets_store.erl @@ -0,0 +1,325 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Test handler that responds with random data. + +-module(riak_api_web_ets_store). + +-if(?OTP_RELEASE == 26). +-feature(maybe_expr, enable). +-endif. + +-behaviour(riak_api_web_handler). + +-export( + [ + match_route/3, + check_permissions/4, + parse_query_params/2, + parse_request_headers/2, + process_request/2, + record_request/3 + ] +). + +-ifdef(TEST). +-export( + [ + setup/0, + generator/1, + cleanup/1 + ] +). +-endif. + +-record(context, { + key :: unicode:chardata(), + method :: 'GET' | 'PUT', + type :: object | file +}). + +-type context() :: #context{}. + +%% @doc match_route for the module +-spec match_route( + riak_api_web_acceptor:method(), + unicode:chardata(), + list(unicode:chardata()) +) -> + no_match + | {method_not_allowed, list(riak_api_web_acceptor:method())} + | {ok, context(), riak_api_web_handler:limits()}. +match_route(Method, _P, [<<>>, <<"ets_store">>, <<"key">>, Key]) when + Method == 'GET'; Method == 'PUT' +-> + { + ok, + #context{key = Key, method = Method, type = object}, + {10, 1024, 16 * 1024} + }; +match_route(_, _, [<<>>, <<"ets_store">>, <<"key">>, _Key]) -> + {method_not_allowed, ['GET', 'PUT']}; +match_route(_, _, _) -> + no_match. + +%% @doc check_permissions for using this module or route +-spec check_permissions( + context(), + riak_api_web_headers:headers(), + riak_api_web_socket:scheme(), + riak_api_web_handler:peer() +) -> + {ok, context()}. +check_permissions(Ctx, _Hdrs, _Scheme, _Peer) -> + {ok, Ctx}. + +%% @doc parse and validate query params, passed as a map +-spec parse_query_params( + context(), + riak_api_web_handler:query_params() +) -> + {ok, context()} | riak_api_web_acceptor:halt_response(). +parse_query_params(Ctx, _Params) -> + {ok, Ctx}. + +%% @doc parse and validate the request headers +-spec parse_request_headers( + context(), + riak_api_web_headers:headers() +) -> + {ok, context()} | riak_api_web_acceptor:halt_response(). +parse_request_headers(Ctx, _ReqHeaders) -> + {ok, Ctx}. + +%% @doc Process the request and produce a response +-spec process_request( + context(), + riak_api_web_body:req_body() +) -> + { + ok, + context(), + { + riak_api_web_acceptor:response_code(), + riak_api_web_headers:header_list(), + riak_api_web_handler:response_body(), + boolean(), + riak_api_web_body:req_body() + } + }. +process_request( + Ctx = #context{key = Key, method = 'GET', type = object}, RqBdy +) -> + case ets:lookup(?MODULE, Key) of + [{Key, Value}] -> + {ok, Ctx, {200, [], Value, true, RqBdy}}; + [] -> + {ok, Ctx, {404, [], <<>>, true, RqBdy}} + end; +process_request( + Ctx = #context{key = Key, method = 'PUT', type = object}, RqBdy +) -> + case riak_api_web_body:get_body(RqBdy, all, 10000) of + {Value, UpdRqBdy} when is_binary(Value) -> + ets:insert(?MODULE, {Key, Value}), + ETag = base64:encode(crypto:hash(md5, Value), #{mode => urlsafe}), + {ok, Ctx, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}}; + {error, content_too_large} -> + {ok, Ctx, {413, [], <<>>, false, RqBdy}} + end. + +%% @doc Record the output of the interaction +-spec record_request( + context(), + riak_api_web_handler:timings(), + riak_api_web_handler:completion() +) -> + ok. +record_request(Ctx, Timings, Completion) -> + {A, B, C} = Timings, + io:format( + user, + "Request ~w ~w with timings ~0p~n", + [Ctx#context.method, Completion, {B - A, C - B, C - A}] + ). + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +basic_handler_test_() -> + {setup, fun setup/0, fun cleanup/1, fun generator/1}. + +setup() -> + inets:start(), + TestPort = find_available_port(lists:seq(8000, 8999)), + IPAddr = {127, 0, 0, 1}, + SpecName = riak_api_web:spec_name(http, IPAddr, TestPort), + Options = + [ + {name, SpecName}, + {ip, IPAddr}, + {port, TestPort}, + {web_acceptor_pool_start_size, 4} + ], + {ok, _Pid} = riak_api_web_socket:start_link(Options), + riak_api_web:add_routes([{20, ?MODULE}]), + ets:new( + ?MODULE, + [named_table, public, {read_concurrency, true}] + ), + {ok, _HTTPC} = inets:start(httpc, [{profile, test_client}]), + ok = httpc:set_options([{verbose, false}], test_client), + {SpecName, IPAddr, TestPort}. + +generator({_SpecName, IPAddr, Port}) -> + [ + put_then_get(IPAddr, Port), + put_too_big(IPAddr, Port), + put_big_header(IPAddr, Port) + ]. + +cleanup({SpecName, _IPAddr, _Port}) -> + ok = inets:stop(), + ?assertMatch(4, riak_api_web_socket:get_active_pool_size(SpecName)), + riak_api_web_socket:stop(SpecName), + ok. + +put_then_get({A, B, C, D}, Port) -> + fun() -> + Key = <<"K0001">>, + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/ets_store/key/~s", + [A, B, C, D, Port, Key] + ) + ), + {ok, {{"HTTP/1.1", 404, "Not Found"}, Rsp1Headers, _Rsp1Body}} = + httpc:request( + get, + {URI, []}, + [], + [], + test_client + ), + ?assertMatch( + {"connection", "keep-alive"}, + lists:keyfind("connection", 1, Rsp1Headers) + ), + ?assertMatch( + {"server", "RiakAPI/4.0 SilverMachine"}, + lists:keyfind("server", 1, Rsp1Headers) + ), + Value = crypto:strong_rand_bytes(64), + ExpectedVTag = + binary_to_list( + base64:encode(crypto:hash(md5, Value), #{mode => urlsafe}) + ), + {ok, {{"HTTP/1.1", 204, "No Content"}, Rsp2Headers, <<>>}} = + httpc:request( + put, + {URI, [], "application/binary", Value}, + [], + [{body_format, binary}], + test_client + ), + ?assertMatch( + {"etag", ExpectedVTag}, + lists:keyfind("etag", 1, Rsp2Headers) + ), + {ok, {{"HTTP/1.1", 200, "OK"}, _Rsp3Headers, Rsp3Body}} = + httpc:request( + get, + {URI, []}, + [], + [{body_format, binary}], + test_client + ), + ?assert(is_binary(Rsp3Body)) + end. + +put_too_big({A, B, C, D}, Port) -> + fun() -> + Key = <<"K0002">>, + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/ets_store/key/~s", + [A, B, C, D, Port, Key] + ) + ), + Value = crypto:strong_rand_bytes(64 * 1024), + {ok, {{"HTTP/1.1", 413, "Content Too Large"}, _Rsp2Headers, <<>>}} = + httpc:request( + put, + {URI, [], "application/binary", Value}, + [], + [{body_format, binary}], + test_client + ) + end. + +put_big_header({A, B, C, D}, Port) -> + fun() -> + Key = <<"K0003">>, + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/ets_store/key/~s", + [A, B, C, D, Port, Key] + ) + ), + HeaderValue = + base64:encode(crypto:strong_rand_bytes(2048), #{mode => urlsafe}), + Value = crypto:strong_rand_bytes(64), + { + ok, + {{"HTTP/1.1", 431, "Request Header Fields Too Large"}, _, RspBdy} + } = + httpc:request( + put, + { + URI, + [{"X-Riak-Vclock", HeaderValue}], + "application/binary", + Value + }, + [], + [{body_format, binary}], + test_client + ), + ?assertMatch(<<"Header exceeded maximum size of 1024">>, RspBdy) + end. + +find_available_port([]) -> + no_port_found; +find_available_port([Port | Rest]) -> + case gen_tcp:listen(Port, []) of + {ok, Sock} -> + ok = gen_tcp:close(Sock), + Port; + _ -> + find_available_port(Rest) + end. + +-endif. diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index ea10668..b7af157 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -239,6 +239,7 @@ basic_handler_test_() -> ). setup() -> + inets:start(), TestPort = find_available_port(lists:seq(8000, 8999)), IPAddr = {127, 0, 0, 1}, SpecName = riak_api_web:spec_name(http, IPAddr, TestPort), @@ -272,6 +273,7 @@ generator({_SpecName, IPAddr, Port}) -> cleanup({SpecName, _IPAddr, _Port}) -> ok = inets:stop(), ?assertMatch(4, riak_api_web_socket:get_active_pool_size(SpecName)), + riak_api_web_socket:stop(SpecName), ok. request_error(IPAddr, Port, Msg, ExpectedCode) -> From 7ccac06054686c2a29b0f8e5cdffadf82d6cd03b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 15:39:27 +0100 Subject: [PATCH 15/53] Add file upload/download test --- src/riak_api_web_body.erl | 87 +++++++++++--------- test/riak_api_web_ets_store.erl | 139 +++++++++++++++++++++++++++++--- 2 files changed, 176 insertions(+), 50 deletions(-) diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index 31cec27..cb73d12 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -46,8 +46,10 @@ % called when the chunk_buff is greater than or equal to the slice % length transfer_complete = false :: boolean(), + + spoof_socket = false :: boolean(), test_packets = [] :: list(binary()) - % only used in tests + % only used in tests }). -type req_body() :: #req_body{}. @@ -252,27 +254,49 @@ get_chunk_size(Line) -> binary_to_integer(ChunkLength, 16) end. --ifdef(TEST). -extend_buffer(ReqBody, Size, _Timeout) -> - {NextBin, RestPackets} = - accrue_packets( - ReqBody#req_body.test_packets, - Size, - ReqBody#req_body.buffer - ), - ReqBody#req_body{buffer = NextBin, test_packets = RestPackets}. --else. -spec extend_buffer( req_body(), pos_integer() | line, non_neg_integer() | undefined ) -> req_body(). -extend_buffer(#req_body{buffer_fun = BufferFun} = ReqBody, Size, Timeout) -> +extend_buffer(#req_body{buffer_fun = BufferFun, spoof_socket = false} = ReqBody, Size, Timeout) -> ReqBody#req_body{ buffer = BufferFun(ReqBody#req_body.buffer, Size, Timeout) - }. --endif. + }; +extend_buffer(#req_body{spoof_socket = true} = ReqBody, Size, _Timeout) -> + {NextBin, RestPackets} = + accrue_packets( + ReqBody#req_body.test_packets, + Size, + ReqBody#req_body.buffer + ), + ReqBody#req_body{buffer = NextBin, test_packets = RestPackets}. + +%% @doc accrue_packets for unit tests only, when #req_body{spoof_socket = true} +accrue_packets(Rest, 0, Buffer) -> + {Buffer, Rest}; +accrue_packets([], line, Buffer) -> + {Buffer, []}; +accrue_packets([NextPacket | Rest], line, Buffer) -> + case erlang:decode_packet(line, NextPacket, []) of + {ok, Line, Overhang} -> + {<>, [Overhang | Rest]}; + {more, _} -> + accrue_packets(Rest, line, <>) + end; +accrue_packets([NextPacket | Rest], Size, Buffer) when is_integer(Size) -> + case Size of + Needed when Needed < byte_size(NextPacket) -> + <> = NextPacket, + {<>, [RestPacket | Rest]}; + Needed -> + accrue_packets( + Rest, + Needed - byte_size(NextPacket), + <> + ) + end. %%%============================================================================ %%% Eunit tests @@ -291,6 +315,7 @@ slicing_fixed_length_test() -> buffer = <<>>, content_length = 11 * 1024, max_size = 1024 * 1024, + spoof_socket = true, test_packets = Packets }, {Slice1, RqBdy1} = get_body(RqBdyInit, 4 * 1024, 60 * 1000), @@ -319,6 +344,7 @@ slicing_fixed_length_test() -> buffer = OnBuffer, content_length = 11 * 1024, max_size = 1024 * 1024, + spoof_socket = true, test_packets = [OnSocket | RestPackets] ++ [DummyRequest] }, {SliceAlt1, RqBdyAlt1} = get_body(RqBdyAlt0, 4 * 1024, 60 * 1000), @@ -343,6 +369,7 @@ all_in_buffer_test() -> buffer = Body, content_length = 11 * 1024, max_size = 1024 * 1024, + spoof_socket = true, test_packets = [] }, {Slice1, RqBdy1} = get_body(RqBdyInit, 4 * 1024, 60 * 1000), @@ -362,6 +389,7 @@ get_empty_body_test() -> buffer = <<"0\r\n\r\n">>, content_length = chunked, max_size = 1024 * 1024, + spoof_socket = true, test_packets = [] }, {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), @@ -397,6 +425,7 @@ get_standard_wikipedia_test() -> buffer = <<"">>, content_length = chunked, max_size = 1024 * 1024, + spoof_socket = true, test_packets = Packets }, {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), @@ -420,6 +449,7 @@ get_standard_wikipedia_inslices_test() -> buffer = <<"">>, content_length = chunked, max_size = 1024 * 1024, + spoof_socket = true, test_packets = Packets }, {Slice1, RqBdy1} = get_body(RqBdyInit, 5, 1000), @@ -441,7 +471,8 @@ get_wikipedia_from_buffer_test() -> 1024 * 1024 ), OtherPackets = [<<"0\r\n">>, <<"\r\n">>], - RqBdy = RqBdyInit#req_body{test_packets = OtherPackets}, + RqBdy = + RqBdyInit#req_body{spoof_socket = true, test_packets = OtherPackets}, {Output, RqBdyEnd} = get_body(RqBdy, all, 1000), ?assertMatch(<<"Wikipedia in\r\n\r\nchunks.">>, Output), ?assertMatch(<<>>, get_buffer(RqBdyEnd)). @@ -466,6 +497,7 @@ ignore_extension_test() -> buffer = <<>>, content_length = chunked, max_size = 1024 * 1024, + spoof_socket = true, test_packets = Packets }, {Output, RqBdyEnd} = get_body(RqBdyInit, all, 1000), @@ -489,6 +521,7 @@ toobig_chunking_test() -> buffer = <<"">>, content_length = chunked, max_size = 20, + spoof_socket = true, test_packets = Packets }, ?assertMatch({error, content_too_large}, get_body(RqBdyInit, all, 1000)). @@ -498,28 +531,4 @@ packet_testbin(<<>>, Acc) -> packet_testbin(<>, Acc) -> packet_testbin(Rest, [Bin | Acc]). -accrue_packets(Rest, 0, Buffer) -> - {Buffer, Rest}; -accrue_packets([], line, Buffer) -> - {Buffer, []}; -accrue_packets([NextPacket | Rest], line, Buffer) -> - case erlang:decode_packet(line, NextPacket, []) of - {ok, Line, Overhang} -> - {<>, [Overhang | Rest]}; - {more, _} -> - accrue_packets(Rest, line, <>) - end; -accrue_packets([NextPacket | Rest], Size, Buffer) when is_integer(Size) -> - case Size of - Needed when Needed < byte_size(NextPacket) -> - <> = NextPacket, - {<>, [RestPacket | Rest]}; - Needed -> - accrue_packets( - Rest, - Needed - byte_size(NextPacket), - <> - ) - end. - -endif. diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index ee1c71f..b6f9736 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -34,7 +34,8 @@ parse_query_params/2, parse_request_headers/2, process_request/2, - record_request/3 + record_request/3, + slice_stream_fun/1 ] ). @@ -48,13 +49,19 @@ ). -endif. +-define(SLICE_SIZE, 10 * 1024). + -record(context, { key :: unicode:chardata(), method :: 'GET' | 'PUT', - type :: object | file + type :: object | file, + slice_list = [] :: list({range(), guid()}), + last_slice_end = 0 :: non_neg_integer() }). -type context() :: #context{}. +-type guid() :: binary(). +-type range() :: {non_neg_integer(), non_neg_integer()}. %% @doc match_route for the module -spec match_route( @@ -65,7 +72,7 @@ no_match | {method_not_allowed, list(riak_api_web_acceptor:method())} | {ok, context(), riak_api_web_handler:limits()}. -match_route(Method, _P, [<<>>, <<"ets_store">>, <<"key">>, Key]) when +match_route(Method, _P, [<<>>, <<"ets_object">>, <<"key">>, Key]) when Method == 'GET'; Method == 'PUT' -> { @@ -73,8 +80,14 @@ match_route(Method, _P, [<<>>, <<"ets_store">>, <<"key">>, Key]) when #context{key = Key, method = Method, type = object}, {10, 1024, 16 * 1024} }; -match_route(_, _, [<<>>, <<"ets_store">>, <<"key">>, _Key]) -> +match_route(_, _, [<<>>, <<"ets_object">>, <<"key">>, _Key]) -> {method_not_allowed, ['GET', 'PUT']}; +match_route(Method, _P, [<<>>, <<"ets_file">>, <<"filename">>, Key]) when Method == 'GET'; Method == 'PUT' -> + { + ok, + #context{key = Key, method = Method, type = file}, + {10, 1024, 1024 * 1024} + }; match_route(_, _, _) -> no_match. @@ -126,8 +139,8 @@ parse_request_headers(Ctx, _ReqHeaders) -> process_request( Ctx = #context{key = Key, method = 'GET', type = object}, RqBdy ) -> - case ets:lookup(?MODULE, Key) of - [{Key, Value}] -> + case ets:lookup(?MODULE, {object, Key}) of + [{{object, Key}, Value}] -> {ok, Ctx, {200, [], Value, true, RqBdy}}; [] -> {ok, Ctx, {404, [], <<>>, true, RqBdy}} @@ -137,11 +150,80 @@ process_request( ) -> case riak_api_web_body:get_body(RqBdy, all, 10000) of {Value, UpdRqBdy} when is_binary(Value) -> - ets:insert(?MODULE, {Key, Value}), + ets:insert(?MODULE, {{object, Key}, Value}), ETag = base64:encode(crypto:hash(md5, Value), #{mode => urlsafe}), {ok, Ctx, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}}; {error, content_too_large} -> {ok, Ctx, {413, [], <<>>, false, RqBdy}} + end; +process_request( + Ctx = #context{key = Key, method = 'GET', type = file}, RqBdy +) -> + case ets:lookup(?MODULE, {file, Key}) of + [{{file, Key}, SliceList}] -> + io:format(user, "Streaming ~w slices~n", [length(SliceList)]), + { + ok, + Ctx, + { + 200, + [], + {stream, slice_stream_fun(lists:sort(SliceList))}, + true, + RqBdy + } + }; + [] -> + {ok, Ctx, {404, [], <<>>, true, RqBdy}} + end; +process_request( + Ctx = #context{key = Key, method = 'PUT', type = file}, RqBdy +) -> + case riak_api_web_body:get_body(RqBdy, ?SLICE_SIZE, 10000) of + {Slice, UpdRqBdy} when is_binary(Slice) -> + SliceKey = generate_uuid(), + SliceSize = byte_size(Slice), + ets:insert_new(?MODULE, {{slice, SliceKey}, Slice}), + process_request( + Ctx#context{ + slice_list = + [ + { + {Ctx#context.last_slice_end, SliceSize}, + SliceKey + } | Ctx#context.slice_list + ], + last_slice_end = Ctx#context.last_slice_end + SliceSize + }, + UpdRqBdy + ); + {done, UpdRqBdy} -> + ets:insert(?MODULE, {{file, Key}, Ctx#context.slice_list}), + ETag = + base64:encode( + crypto:hash(md5, term_to_binary(Ctx#context.slice_list)), + #{mode => urlsafe} + ), + {ok, Ctx, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}}; + {error, content_too_large} -> + {ok, Ctx, {413, [], <<>>, false, RqBdy}} + end. + +generate_uuid() -> + <> = crypto:strong_rand_bytes(16), + L = io_lib:format( + "~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b", + [A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E] + ), + list_to_binary(L). + +slice_stream_fun([]) -> + fun() -> done end; +slice_stream_fun(List) -> + fun() -> + [{_Range, SliceKey} | Rest] = List, + [{{slice, SliceKey}, Slice}] = ets:lookup(?MODULE, {slice, SliceKey}), + {Slice, slice_stream_fun(Rest)} end. %% @doc Record the output of the interaction @@ -195,7 +277,8 @@ generator({_SpecName, IPAddr, Port}) -> [ put_then_get(IPAddr, Port), put_too_big(IPAddr, Port), - put_big_header(IPAddr, Port) + put_big_header(IPAddr, Port), + put_then_get_big_file(IPAddr, Port) ]. cleanup({SpecName, _IPAddr, _Port}) -> @@ -204,13 +287,47 @@ cleanup({SpecName, _IPAddr, _Port}) -> riak_api_web_socket:stop(SpecName), ok. +put_then_get_big_file({A, B, C, D}, Port) -> + fun() -> + Key = <<"K0004">>, + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/ets_file/filename/~s", + [A, B, C, D, Port, Key] + ) + ), + Value = crypto:strong_rand_bytes(100 * 1024), + Hash = crypto:hash(md5, Value), + {ok, {{"HTTP/1.1", 204, "No Content"}, _Headers, <<>>}} = + httpc:request( + put, + {URI, [], "application/binary", Value}, + [], + [{body_format, binary}], + test_client + ), + {ok, {{"HTTP/1.1", 200, "OK"}, _FetchHeaders, FetchBody}} = + httpc:request( + get, + {URI, []}, + [], + [{body_format, binary}], + test_client + ), + ?assert(is_binary(FetchBody)), + ?assertMatch(102400, byte_size(FetchBody)), + ReturnedHash = crypto:hash(md5, FetchBody), + ?assertMatch(Hash, ReturnedHash) + end. + put_then_get({A, B, C, D}, Port) -> fun() -> Key = <<"K0001">>, URI = lists:flatten( io_lib:format( - "http://~w.~w.~w.~w:~w/ets_store/key/~s", + "http://~w.~w.~w.~w:~w/ets_object/key/~s", [A, B, C, D, Port, Key] ) ), @@ -264,7 +381,7 @@ put_too_big({A, B, C, D}, Port) -> URI = lists:flatten( io_lib:format( - "http://~w.~w.~w.~w:~w/ets_store/key/~s", + "http://~w.~w.~w.~w:~w/ets_object/key/~s", [A, B, C, D, Port, Key] ) ), @@ -285,7 +402,7 @@ put_big_header({A, B, C, D}, Port) -> URI = lists:flatten( io_lib:format( - "http://~w.~w.~w.~w:~w/ets_store/key/~s", + "http://~w.~w.~w.~w:~w/ets_object/key/~s", [A, B, C, D, Port, Key] ) ), From 52afe863213d3ce8b54eb472d8af6f0e109b50b6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 15:39:42 +0100 Subject: [PATCH 16/53] Formatting --- src/riak_api_web_body.erl | 8 ++++++-- test/riak_api_web_ets_store.erl | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index cb73d12..0f8cc6f 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -49,7 +49,7 @@ spoof_socket = false :: boolean(), test_packets = [] :: list(binary()) - % only used in tests + % only used in tests }). -type req_body() :: #req_body{}. @@ -260,7 +260,11 @@ get_chunk_size(Line) -> non_neg_integer() | undefined ) -> req_body(). -extend_buffer(#req_body{buffer_fun = BufferFun, spoof_socket = false} = ReqBody, Size, Timeout) -> +extend_buffer( + #req_body{buffer_fun = BufferFun, spoof_socket = false} = ReqBody, + Size, + Timeout +) -> ReqBody#req_body{ buffer = BufferFun(ReqBody#req_body.buffer, Size, Timeout) }; diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index b6f9736..f539164 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -82,7 +82,9 @@ match_route(Method, _P, [<<>>, <<"ets_object">>, <<"key">>, Key]) when }; match_route(_, _, [<<>>, <<"ets_object">>, <<"key">>, _Key]) -> {method_not_allowed, ['GET', 'PUT']}; -match_route(Method, _P, [<<>>, <<"ets_file">>, <<"filename">>, Key]) when Method == 'GET'; Method == 'PUT' -> +match_route(Method, _P, [<<>>, <<"ets_file">>, <<"filename">>, Key]) when + Method == 'GET'; Method == 'PUT' +-> { ok, #context{key = Key, method = Method, type = file}, @@ -191,7 +193,8 @@ process_request( { {Ctx#context.last_slice_end, SliceSize}, SliceKey - } | Ctx#context.slice_list + } + | Ctx#context.slice_list ], last_slice_end = Ctx#context.last_slice_end + SliceSize }, From ab1a5b203f4a11069aa5a0e3e91baf6e211ae7cb Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 16:24:57 +0100 Subject: [PATCH 17/53] Chunked puts --- src/riak_api_web_handler.erl | 2 +- test/riak_api_web_ets_store.erl | 122 +++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index c4694c4..29109c2 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -21,7 +21,7 @@ %% %% the callbacks will be called in the following order, with the context %% returned from the previous call included in the next -%% - match_route/2 +%% - match_route/3 %% - check_permissions/4 %% - parse_query_params/2 %% - parse_request_headers/2 diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index f539164..06112f2 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -163,7 +163,6 @@ process_request( ) -> case ets:lookup(?MODULE, {file, Key}) of [{{file, Key}, SliceList}] -> - io:format(user, "Streaming ~w slices~n", [length(SliceList)]), { ok, Ctx, @@ -250,6 +249,7 @@ record_request(Ctx, Timings, Completion) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). basic_handler_test_() -> {setup, fun setup/0, fun cleanup/1, fun generator/1}. @@ -281,7 +281,9 @@ generator({_SpecName, IPAddr, Port}) -> put_then_get(IPAddr, Port), put_too_big(IPAddr, Port), put_big_header(IPAddr, Port), - put_then_get_big_file(IPAddr, Port) + put_then_get_file(IPAddr, Port), + raw_put_then_get_file(IPAddr, Port), + raw_put_toobig_object(IPAddr, Port) ]. cleanup({SpecName, _IPAddr, _Port}) -> @@ -290,7 +292,121 @@ cleanup({SpecName, _IPAddr, _Port}) -> riak_api_web_socket:stop(SpecName), ok. -put_then_get_big_file({A, B, C, D}, Port) -> +raw_put_toobig_object({A, B, C, D}, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + {A, B, C, D}, + Port, + [binary, {packet, raw}, {active, false}] + ), + RequestHead = + << + "PUT /ets_object/key/K0006 HTTP/1.1\r\n" + "Connection: close\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + >>, + gen_tcp:send(Socket, RequestHead), + _Hash = send_chunked_4KBobject(Socket), + ok = inet:setopts(Socket, [{packet, line}]), + {ok, L1} = gen_tcp:recv(Socket, 0, 10000), + ?assertMatch( + <<"HTTP/1.1 413 Content Too Large\r\n">>, + L1 + ), + ok = inet:setopts(Socket, [{packet, raw}]), + {ok, _RspHdrs} = gen_tcp:recv(Socket, 0, 10000), + ok = gen_tcp:close(Socket) + end. + +raw_put_then_get_file({A, B, C, D}, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + {A, B, C, D}, + Port, + [binary, {packet, raw}, {active, false}] + ), + RequestHead = + << + "PUT /ets_file/filename/K0005 HTTP/1.1\r\n" + "Connection: close\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + >>, + gen_tcp:send(Socket, RequestHead), + Hash = send_chunked_4KBobject(Socket), + ok = inet:setopts(Socket, [{packet, line}]), + {ok, L1} = gen_tcp:recv(Socket, 0, 10000), + ?assertMatch( + <<"HTTP/1.1 204 No Content\r\n">>, + L1 + ), + ok = inet:setopts(Socket, [{packet, raw}]), + {ok, _RspHdrs} = gen_tcp:recv(Socket, 0, 10000), + ok = gen_tcp:close(Socket), + URI = + lists:flatten( + io_lib:format( + "http://~w.~w.~w.~w:~w/ets_file/filename/~s", + [A, B, C, D, Port, <<"K0005">>] + ) + ), + {ok, {{"HTTP/1.1", 200, "OK"}, _FetchHeaders, FetchBody}} = + httpc:request( + get, + {URI, []}, + [], + [{body_format, binary}], + test_client + ), + ?assert(is_binary(FetchBody)), + ?assertMatch(41020, byte_size(FetchBody)), + ReturnedHash = crypto:hash(md5, FetchBody), + ?assertMatch(Hash, ReturnedHash) + end. + +send_chunked_4KBobject(Socket) -> + TestValue = crypto:strong_rand_bytes((10 * 4092) + 100), + Hash = crypto:hash(md5, TestValue), + << + Chunk1:4092/binary, + Chunk2:4092/binary, + Chunk3:4092/binary, + Chunk4:4092/binary, + Chunk5:4092/binary, + Chunk6:4092/binary, + Chunk7:4092/binary, + Chunk8:4092/binary, + Chunk9:4092/binary, + Chunk10:4092/binary, + Chunk11:100/binary + >> = TestValue, + lists:foreach( + fun(Chunk) -> + Size = integer_to_binary(byte_size(Chunk), 16), + Bin = iolist_to_binary([Size, <<"\r\n">>, Chunk, <<"\r\n">>]), + gen_tcp:send(Socket, Bin) + end, + [ + Chunk1, + Chunk2, + Chunk3, + Chunk4, + Chunk5, + Chunk6, + Chunk7, + Chunk8, + Chunk9, + Chunk10, + Chunk11 + ] + ), + gen_tcp:send(Socket, <<"0\r\n\r\n">>), + Hash. + +put_then_get_file({A, B, C, D}, Port) -> fun() -> Key = <<"K0004">>, URI = From 65a4040b6659bb81bed67e5082185e7b44bf977b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2026 21:16:54 +0100 Subject: [PATCH 18/53] Check keepalive in HTTP 1.0 --- test/riak_api_web_get_random.erl | 52 +++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index b7af157..4d401f0 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -200,7 +200,20 @@ basic_handler_test_() -> << "GET /random_data?required_size=~w HTTP/1.1\r\n" "X-Riak-request_id: ~w\r\n" - "Connection: ~w\r\n" + "Connection: ~s\r\n" + "Content-Length: 0\r\n" + "\r\n" + >>, + [Size, ID, KeepAlive] + ) +). + +-define(REQUEST_BIN_V10(ID, Size, KeepAlive), + io_lib:format( + << + "GET /random_data?required_size=~w HTTP/1.0\r\n" + "X-Riak-request_id: ~w\r\n" + "Connection: ~s\r\n" "Content-Length: 0\r\n" "\r\n" >>, @@ -263,6 +276,7 @@ generator({_SpecName, IPAddr, Port}) -> request_single_value(IPAddr, Port, 64), request_single_value(IPAddr, Port, 2048), pipeline_request_values(IPAddr, Port, 16), + request_keepalive_v10(IPAddr, Port, 256), request_error(IPAddr, Port, ?WRONG_URL, 404), request_error(IPAddr, Port, ?POST_NOT_GET, 405), request_error(IPAddr, Port, ?BAD_VERSION, 400), @@ -338,13 +352,40 @@ request_single_value(IPAddr, Port, Size) -> Port, [binary, {packet, raw}, {active, false}] ), - Request = ?REQUEST_BIN(1, Size, close), + Request = ?REQUEST_BIN(1, Size, <<"close">>), ok = gen_tcp:send(Socket, Request), {ok, Data} = gen_tcp:recv(Socket, 0), ?assertMatch(<<>>, validate_response(Data, Size, Socket)), ok = gen_tcp:close(Socket) end. +request_keepalive_v10(IPAddr, Port, Size) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = + list_to_binary( + lists:flatten(?REQUEST_BIN_V10(1, Size, <<"keep-alive">>)) + ), + ok = gen_tcp:send(Socket, Request), + {ok, Data1} = gen_tcp:recv(Socket, 0), + ?assertMatch( + <<>>, + validate_response(Data1, Size, Socket, <<"HTTP/1.0 200 OK\r\n">>) + ), + ok = gen_tcp:send(Socket, Request), + {ok, Data2} = gen_tcp:recv(Socket, 0), + ?assertMatch( + <<>>, + validate_response(Data2, Size, Socket, <<"HTTP/1.0 200 OK\r\n">>) + ), + ok = gen_tcp:close(Socket) + end. + pipeline_request_values(IPAddr, Port, Size) -> fun() -> {ok, Socket} = @@ -355,7 +396,7 @@ pipeline_request_values(IPAddr, Port, Size) -> ), Requests = lists:map( - fun(I) -> ?REQUEST_BIN(I, Size, 'keep-alive') end, + fun(I) -> ?REQUEST_BIN(I, Size, <<"keep-alive">>) end, lists:seq(1, 5) ), Request = iolist_to_binary(Requests), @@ -412,8 +453,11 @@ extract_headers(Data, Socket, ExpectedResponseLine) -> end. validate_response(Data, Size, Socket) -> + validate_response(Data, Size, Socket, <<"HTTP/1.1 200 OK\r\n">>). + +validate_response(Data, Size, Socket, StatusLine) -> {HeaderKeys, Rem} = - extract_headers(Data, Socket, <<"HTTP/1.1 200 OK\r\n">>), + extract_headers(Data, Socket, StatusLine), ?assertMatch( [ <<"Connection">>, From 9333085fef1c6470912322bb4206676e55f2f0d0 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 1 Apr 2026 10:50:09 +0100 Subject: [PATCH 19/53] Remove webmachine/mochiweb Retain mohijson2 and mochinum to ease transition --- rebar.config | 6 - src/mochijson2.erl | 976 +++++++++++++++++++++++++++++++++++++++++++ src/mochinum.erl | 374 +++++++++++++++++ src/riak_api.app.src | 4 +- 4 files changed, 1351 insertions(+), 9 deletions(-) create mode 100644 src/mochijson2.erl create mode 100644 src/mochinum.erl diff --git a/rebar.config b/rebar.config index 0827d3d..b78713a 100644 --- a/rebar.config +++ b/rebar.config @@ -32,12 +32,6 @@ {riak_pb, {git, "https://github.com/OpenRiak/riak_pb.git", {branch, "openriak-3.4"}}}, - {webmachine, - {git, "https://github.com/OpenRiak/webmachine.git", - {branch, "openriak-3.4"}}}, - {mochiweb, - {git, "https://github.com/OpenRiak/mochiweb.git", - {branch, "openriak-3.4"}}}, {riak_core, {git, "https://github.com/OpenRiak/riak_core.git", {branch, "openriak-4.0"}}} diff --git a/src/mochijson2.erl b/src/mochijson2.erl new file mode 100644 index 0000000..4a52437 --- /dev/null +++ b/src/mochijson2.erl @@ -0,0 +1,976 @@ +%% @author Bob Ippolito +%% @copyright 2007 Mochi Media, Inc. +%% +%% Permission is hereby granted, free of charge, to any person obtaining a +%% copy of this software and associated documentation files (the "Software"), +%% to deal in the Software without restriction, including without limitation +%% the rights to use, copy, modify, merge, publish, distribute, sublicense, +%% and/or sell copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following conditions: +%% +%% The above copyright notice and this permission notice shall be included in +%% all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +%% DEALINGS IN THE SOFTWARE. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works +%% with binaries as strings, arrays as lists (without an {array, _}) +%% wrapper and it only knows how to decode UTF-8 (and ASCII). +%% +%% JSON terms are decoded as follows (javascript -> erlang): +%%
    +%%
  • {"key": "value"} -> +%% {struct, [{<<"key">>, <<"value">>}]}
  • +%%
  • ["array", 123, 12.34, true, false, null] -> +%% [<<"array">>, 123, 12.34, true, false, null] +%%
  • +%%
+%%
    +%%
  • Strings in JSON decode to UTF-8 binaries in Erlang
  • +%%
  • Objects decode to {struct, PropList}
  • +%%
  • Numbers decode to integer or float
  • +%%
  • true, false, null decode to their respective terms.
  • +%%
+%% The encoder will accept the same format that the decoder will produce, +%% but will also allow additional cases for leniency: +%%
    +%%
  • atoms other than true, false, null will be considered UTF-8 +%% strings (even as a proplist key) +%%
  • +%%
  • {json, IoList} will insert IoList directly into the output +%% with no validation +%%
  • +%%
  • {array, Array} will be encoded as Array +%% (legacy mochijson style) +%%
  • +%%
  • A non-empty raw proplist will be encoded as an object as long +%% as the first pair does not have an atom key of json, struct, +%% or array +%%
  • +%%
+ +-module(mochijson2). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1, decode/2]). + +%% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, + column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, + column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, + column=1, + line=1+S#decoder.line}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + + +%% @type json_string() = atom | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = [json_term()] +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_eep18_object() = {[{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() | json_eep18_object() | json_iolist() + +-record(encoder, {handler=null, + utf8=false}). + +-record(decoder, {object_hook=null, + offset=0, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist(), [{format, proplist | eep18 | struct | map}]) -> json_term() +%% @doc Decode the given iolist to Erlang terms using the given object format +%% for decoding, where proplist returns JSON objects as [{binary(), json_term()}] +%% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, +%% map returns JSON objects as #{binary() => json_term()}, and struct +%% returns them as-is. +decode(S, Options) -> + json_decode(S, parse_decoder_options(Options, #decoder{})). + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}); +parse_decoder_options([{format, map} | Rest], State) -> + Hook = make_object_hook_for_map(), + parse_decoder_options(Rest, State#decoder{object_hook=Hook}); +parse_decoder_options([{format, Format} | Rest], State) + when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist -> + parse_decoder_options(Rest, State#decoder{object_hook=Format}). + +make_object_hook_for_map() -> + fun ({struct, P}) -> maps:from_list(P) end. + +json_encode(true, _State) -> + <<"true">>; +json_encode(false, _State) -> + <<"false">>; +json_encode(null, _State) -> + <<"null">>; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(S, State) when is_binary(S); is_atom(S) -> + json_encode_string(S, State); +json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso + K =/= array andalso + K =/= json) -> + json_encode_proplist(Props, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({}, State) -> + json_encode_proplist([], State); +json_encode(Array, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({array, Array}, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode(M, State) when is_map(M) -> + json_encode_map(M, State); +json_encode({json, IoList}, _State) -> + IoList; +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + <<"[]">>; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + <<"{}">>; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_map(Map, _State) when map_size(Map) =:= 0 -> + <<"{}">>; +json_encode_map(Map, State) -> + F = fun(K, V, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = maps:fold(F, "{", Map), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, State) when is_atom(A) -> + json_encode_string(atom_to_binary(A, latin1), State); +json_encode_string(B, State) when is_binary(B) -> + case json_bin_is_safe(B) of + true -> + [?Q, B, ?Q]; + false -> + json_encode_string_unicode(unicode:characters_to_list(B), State, [?Q]) + end; +json_encode_string(I, _State) when is_integer(I) -> + [?Q, integer_to_list(I), ?Q]; +json_encode_string(L, State) when is_list(L) -> + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(L, State, [?Q]) + end. + +json_string_is_safe([]) -> + true; +json_string_is_safe([C | Rest]) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_string_is_safe(Rest); + _ -> + exit({json_encode, {bad_char, C}}) + end. + +json_bin_is_safe(<<>>) -> + true; +json_bin_is_safe(<>) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f -> + false; + C when C < 16#7f -> + json_bin_is_safe(Rest) + end. + +json_encode_string_unicode([], _State, Acc) -> + lists:reverse([$\" | Acc]); +json_encode_string_unicode([C | Cs], State, Acc) -> + Acc1 = case C of + ?Q -> + [?Q, $\\ | Acc]; + %% Escaping solidus is only useful when trying to protect + %% against "" injection attacks which are only + %% possible when JSON is inserted into a HTML document + %% in-line. mochijson2 does not protect you from this, so + %% if you do insert directly into HTML then you need to + %% uncomment the following case or escape the output of encode. + %% + %% $/ -> + %% [$/, $\\ | Acc]; + %% + $\\ -> + [$\\, $\\ | Acc]; + $\b -> + [$b, $\\ | Acc]; + $\f -> + [$f, $\\ | Acc]; + $\n -> + [$n, $\\ | Acc]; + $\r -> + [$r, $\\ | Acc]; + $\t -> + [$t, $\\ | Acc]; + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [unicode:characters_to_binary([C]) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> + [unihex(C) | Acc]; + C when C < 16#7f -> + [C | Acc]; + _ -> + %% json_string_is_safe guarantees that this branch is dead + exit({json_encode, {bad_char, C}}) + end, + json_encode_string_unicode(Cs, State, Acc1). + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <> = <>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(L, S) when is_list(L) -> + json_decode(iolist_to_binary(L), S); +json_decode(B, S) -> + {Res, S1} = decode1(B, S), + {eof, _} = tokenize(B, S1#decoder{state=trim}), + Res. + +decode1(B, S=#decoder{state=null}) -> + case tokenize(B, S#decoder{state=any}) of + {{const, C}, S1} -> + {C, S1}; + {start_array, S1} -> + decode_array(B, S1); + {start_object, S1} -> + decode_object(B, S1) + end. + +make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct -> + V; +make_object({struct, P}, #decoder{object_hook=eep18}) -> + {P}; +make_object({struct, P}, #decoder{object_hook=proplist}) -> + P; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(B, S) -> + decode_object(B, S#decoder{state=key}, []). + +decode_object(B, S=#decoder{state=key}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {{const, K}, S1} -> + {colon, S2} = tokenize(B, S1), + {V, S3} = decode1(B, S2#decoder{state=null}), + decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {comma, S1} -> + decode_object(B, S1#decoder{state=key}, Acc) + end. + +decode_array(B, S) -> + decode_array(B, S#decoder{state=any}, []). + +decode_array(B, S=#decoder{state=any}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {start_array, S1} -> + {Array, S2} = decode_array(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {start_object, S1} -> + {Array, S2} = decode_object(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, S1} -> + decode_array(B, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {comma, S1} -> + decode_array(B, S1#decoder{state=any}, Acc) + end. + +tokenize_string(B, S=#decoder{offset=O}) -> + case tokenize_string_fast(B, O) of + {escape, O1} -> + Length = O1 - O, + S1 = ?ADV_COL(S, Length), + <<_:O/binary, Head:Length/binary, _/binary>> = B, + tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); + O1 -> + Length = O1 - O, + <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, + {{const, String}, ?ADV_COL(S, Length + 1)} + end. + +tokenize_string_fast(B, O) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + O; + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); + _ -> + throw(invalid_utf8) + end. + +tokenize_string(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; + <<_:O/binary, "\\\"", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); + <<_:O/binary, "\\\\", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); + <<_:O/binary, "\\/", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); + <<_:O/binary, "\\b", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); + <<_:O/binary, "\\f", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); + <<_:O/binary, "\\n", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); + <<_:O/binary, "\\r", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); + <<_:O/binary, "\\t", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); + <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> + C = erlang:list_to_integer([C3, C2, C1, C0], 16), + if C > 16#D7FF, C < 16#DC00 -> + %% coalesce UTF-16 surrogate pair + <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, + D = erlang:list_to_integer([D3,D2,D1,D0], 16), + Acc1 = [unicode:characters_to_binary( + <>, + utf16) + | Acc], + tokenize_string(B, ?ADV_COL(S, 12), Acc1); + true -> + Acc1 = [unicode:characters_to_binary([C]) | Acc], + tokenize_string(B, ?ADV_COL(S, 6), Acc1) + end; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) + end. + +tokenize_number(B, S) -> + case tokenize_number(B, sign, S, []) of + {{int, Int}, S1} -> + {{const, list_to_integer(Int)}, S1}; + {{float, Float}, S1} -> + {{const, list_to_float(Float)}, S1} + end. + +tokenize_number(B, sign, S=#decoder{offset=O}, []) -> + case B of + <<_:O/binary, $-, _/binary>> -> + tokenize_number(B, int, ?INC_COL(S), [$-]); + _ -> + tokenize_number(B, int, S, []) + end; +tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $0, _/binary>> -> + tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); + <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, frac, S, Acc) + end; +tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> + tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); + _ -> + {{int, lists:reverse(Acc)}, S} + end; +tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end; +tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> + tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, eint, S, Acc) + end; +tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize(B, ?INC_CHAR(S, C)); + <<_:O/binary, "{", _/binary>> -> + {start_object, ?INC_COL(S)}; + <<_:O/binary, "}", _/binary>> -> + {end_object, ?INC_COL(S)}; + <<_:O/binary, "[", _/binary>> -> + {start_array, ?INC_COL(S)}; + <<_:O/binary, "]", _/binary>> -> + {end_array, ?INC_COL(S)}; + <<_:O/binary, ",", _/binary>> -> + {comma, ?INC_COL(S)}; + <<_:O/binary, ":", _/binary>> -> + {colon, ?INC_COL(S)}; + <<_:O/binary, "null", _/binary>> -> + {{const, null}, ?ADV_COL(S, 4)}; + <<_:O/binary, "true", _/binary>> -> + {{const, true}, ?ADV_COL(S, 4)}; + <<_:O/binary, "false", _/binary>> -> + {{const, false}, ?ADV_COL(S, 5)}; + <<_:O/binary, "\"", _/binary>> -> + tokenize_string(B, ?INC_COL(S)); + <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) + orelse C =:= $- -> + tokenize_number(B, S); + <<_:O/binary>> -> + trim = S#decoder.state, + {eof, S} + end. +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_binary(K) -> true end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + ?assert(is_obj(Obj)), + Obj. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv(L1, L2) when is_list(L1), is_list(L2) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +decode_test() -> + [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> + test_one(e2j_test_vec(utf8), 1). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {<<"foo">>, "\"foo\""}, + {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, + {<<"">>, "\"\""}, + {<<"\n\n\n">>, "\"\\n\\n\\n\""}, + {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {[], "[]"}, + {[[]], "[[]]"}, + {[1, <<"foo">>], "[1,\"foo\"]"}, + + %% json array in a json object + {obj_from_list([{<<"foo">>, [123]}]), + "{\"foo\":[123]}"}, + + %% json object in a json object + {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + %% fold evaluation order + {obj_from_list([{<<"foo">>, []}, + {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, + {<<"alice">>, <<"bob">>}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + %% json object in a json array + {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +%% test utf8 encoding +encoder_utf8_test() -> + %% safe conversion case (default) + <<"\"\\u0001\\u0442\\u0435\\u0441\\u0442\"">> = + iolist_to_binary(encode(<<1,"\321\202\320\265\321\201\321\202">>)), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + <<34,"\\u0001",209,130,208,181,209,129,209,130,34>> = + iolist_to_binary(Enc(<<1,"\321\202\320\265\321\201\321\202">>)). + +input_validation_test() -> + Good = [ + {16#00A3, <>}, %% pound + {16#20AC, <>}, %% euro + {16#10196, <>} %% denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = unicode:characters_to_binary([CodePoint]), + Expect = decode(UTF8) + end, Good), + + Bad = [ + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <>, + %% missing continuations, last byte in each should be 80-BF + <>, + <>, + <>, + %% we don't support code points > 10FFFF per RFC 3629 + <>, + %% escape characters trigger a different code path + <> + ], + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = unicode:characters_to_binary([16#0001d120]), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary( + encode( + binary_to_atom( + unicode:characters_to_binary([16#0001d120]), latin1)))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{foo, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{<<"foo">>, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{"foo", 1}]))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<>)), + ?assertEqual(<>, decode(encode(<>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(unicode:characters_to_binary([16#0001d120]))), + ?assertEqual( + [16#0001d120], + unicode:characters_to_list( + decode( + encode( + binary_to_atom( + unicode:characters_to_binary([16#0001d120]), + latin1))))), + ?assertEqual( + false, + json_string_is_safe([16#10ffff])), + ?assertEqual( + false, + json_bin_is_safe(unicode:characters_to_binary([16#10ffff]))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{x,y}}}}, + catch encode({x,y})), + F = fun ({x,y}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({x, y}))), + ok. + +encode_empty_test_() -> + [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))} + || {A, B} <- [{"eep18 {}", {}}, + {"eep18 {[]}", {[]}}, + {"{struct, []}", {struct, []}}]]. + +encode_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))} + || F <- [struct, eep18, proplist]]. + +format_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(A, decode(JSON, [{format, F}]))} + || {F, A} <- [{struct, {struct, P}}, + {eep18, {P}}, + {proplist, P}]]. + +array_test() -> + A = [<<"hello">>], + ?assertEqual(A, decode(encode({array, A}))). + +bad_char_test() -> + ?assertEqual( + {'EXIT', {json_encode, {bad_char, 16#110000}}}, + catch json_string_is_safe([16#110000])). + +utf8_roundtrip_test_() -> + %% These are the boundary cases for UTF8 encoding + Codepoints = [%% 7 bits -> 1 byte + 16#00, 16#7f, + %% 11 bits -> 2 bytes + 16#080, 16#07ff, + %% 16 bits -> 3 bytes + 16#0800, 16#ffff, + 16#d7ff, 16#e000, + %% 21 bits -> 4 bytes + 16#010000, 16#10ffff], + UTF8 = unicode:characters_to_binary(Codepoints), + Encode = encoder([{utf8, true}]), + [{"roundtrip escaped", + ?_assertEqual(UTF8, decode(encode(UTF8)))}, + {"roundtrip utf8", + ?_assertEqual(UTF8, decode(Encode(UTF8)))}]. + +utf8_non_character_test_() -> + S = unicode:characters_to_binary([16#ffff, 16#fffe]), + [{"roundtrip escaped", ?_assertEqual(S, decode(encode(S)))}, + {"roundtrip utf8", ?_assertEqual(S, decode((encoder([{utf8, true}]))(S)))}]. + +decode_map_test() -> + Json = "{\"var1\": 3, \"var2\": {\"var3\": 7}}", + M = #{<<"var1">> => 3,<<"var2">> => #{<<"var3">> => 7}}, + ?assertEqual(M, decode(Json, [{format, map}])). + +encode_map_test() -> + ?assertEqual(<<"{\"a\":1}">>, iolist_to_binary(encode(#{a => 1}))), + M = #{<<"a">> => 1, <<"b">> => #{<<"c">> => 2}}, + ?assertEqual(M, decode(iolist_to_binary(encode(#{a => 1, b => #{ c => 2}})), [{format, map}])). + +encode_empty_map_test() -> + ?assertEqual(<<"{}">>, encode(#{})). + +-endif. diff --git a/src/mochinum.erl b/src/mochinum.erl new file mode 100644 index 0000000..867d6c9 --- /dev/null +++ b/src/mochinum.erl @@ -0,0 +1,374 @@ +%% @copyright 2007 Mochi Media, Inc. +%% @author Bob Ippolito +%% +%% Permission is hereby granted, free of charge, to any person obtaining a +%% copy of this software and associated documentation files (the "Software"), +%% to deal in the Software without restriction, including without limitation +%% the rights to use, copy, modify, merge, publish, distribute, sublicense, +%% and/or sell copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following conditions: +%% +%% The above copyright notice and this permission notice shall be included in +%% all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +%% DEALINGS IN THE SOFTWARE. + +%% @doc Useful numeric algorithms for floats that cover some deficiencies +%% in the math module. More interesting is digits/1, which implements +%% the algorithm from: +%% http://www.cs.indiana.edu/~burger/fp/index.html +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming Language +%% Design and Implementation. + +-module(mochinum). +-author("Bob Ippolito "). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1]). + +%% IEEE 754 Float exponent bias +-define(FLOAT_BIAS, 1022). +-define(MIN_EXP, -1074). +-define(BIG_POW, 4503599627370496). + +%% External API + +%% @spec digits(number()) -> string() +%% @doc Returns a string that accurately represents the given integer or float +%% using a conservative amount of digits. Great for generating +%% human-readable output, or compact ASCII serializations for floats. +digits(N) when is_integer(N) -> + integer_to_list(N); +digits(Float) when Float == 0.0 -> + "0.0"; +digits(Float) -> + {Frac1, Exp1} = frexp_int(Float), + [Place0 | Digits0] = digits1(Float, Exp1, Frac1), + {Place, Digits} = transform_digits(Place0, Digits0), + R = insert_decimal(Place, Digits), + case Float < 0 of + true -> + [$- | R]; + _ -> + R + end. + +%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} +%% @doc Return the fractional and exponent part of an IEEE 754 double, +%% equivalent to the libc function of the same name. +%% F = Frac * pow(2, Exp). +frexp(F) -> + frexp1(unpack(F)). + +%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() +%% @doc Moderately efficient way to exponentiate integers. +%% int_pow(10, 2) = 100. +int_pow(_X, 0) -> + 1; +int_pow(X, N) when N > 0 -> + int_pow(X, N, 1). + +%% @spec int_ceil(F::float()) -> integer() +%% @doc Return the ceiling of F as an integer. The ceiling is defined as +%% F when F == trunc(F); +%% trunc(F) when F < 0; +%% trunc(F) + 1 when F > 0. +int_ceil(X) -> + T = trunc(X), + case (X - T) of + Pos when Pos > 0 -> T + 1; + _ -> T + end. + + +%% Internal API + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) when Place > 0 -> + L = length(S), + case Place - L of + 0 -> + S ++ ".0"; + N when N < 0 -> + {S0, S1} = lists:split(L + N, S), + S0 ++ "." ++ S1; + N when N < 6 -> + %% More places than digits + S ++ lists:duplicate(N, $0) ++ ".0"; + _ -> + insert_decimal_exp(Place, S) + end; +insert_decimal(Place, S) when Place > -6 -> + "0." ++ lists:duplicate(abs(Place), $0) ++ S; +insert_decimal(Place, S) -> + insert_decimal_exp(Place, S). + +insert_decimal_exp(Place, S) -> + [C | S0] = S, + S1 = case S0 of + [] -> + "0"; + _ -> + S0 + end, + Exp = case Place < 0 of + true -> + "e-"; + false -> + "e+" + end, + [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). + + +digits1(Float, Exp, Frac) -> + Round = ((Frac band 1) =:= 0), + case Exp >= 0 of + true -> + BExp = 1 bsl Exp, + case (Frac =/= ?BIG_POW) of + true -> + scale((Frac * BExp * 2), 2, BExp, BExp, + Round, Round, Float); + false -> + scale((Frac * BExp * 4), 4, (BExp * 2), BExp, + Round, Round, Float) + end; + false -> + case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of + true -> + scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, + Round, Round, Float); + false -> + scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, + Round, Round, Float) + end + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up table + %% for int_pow(10, N) where we do not. + case Est >= 0 of + true -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + false -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TooLow of + true -> + [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; + false -> + [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = case LowOk of + true -> + R =< MMinus; + false -> + R < MMinus + end, + TC2 = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TC1 of + false -> + case TC2 of + false -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, + LowOk, HighOk)]; + true -> + [D + 1] + end; + true -> + case TC2 of + false -> + [D]; + true -> + case R * 2 < S of + true -> + [D]; + false -> + [D + 1] + end + end + end. + +unpack(Float) -> + <> = <>, + {Sign, Exp, Frac}. + +frexp1({_Sign, 0, 0}) -> + {0.0, 0}; +frexp1({Sign, 0, Frac}) -> + Exp = log2floor(Frac), + <> = <>, + {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; +frexp1({Sign, Exp, Frac}) -> + <> = <>, + {Frac1, Exp - ?FLOAT_BIAS}. + +log2floor(Int) -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + + +transform_digits(Place, [0 | Rest]) -> + transform_digits(Place, Rest); +transform_digits(Place, Digits) -> + {Place, [$0 + D || D <- Digits]}. + + +frexp_int(F) -> + case unpack(F) of + {_Sign, 0, Frac} -> + {Frac, ?MIN_EXP}; + {_Sign, Exp, Frac} -> + {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS} + end. + +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +int_ceil_test() -> + ?assertEqual(1, int_ceil(0.0001)), + ?assertEqual(0, int_ceil(0.0)), + ?assertEqual(1, int_ceil(0.99)), + ?assertEqual(1, int_ceil(1.0)), + ?assertEqual(-1, int_ceil(-1.5)), + ?assertEqual(-2, int_ceil(-2.0)), + ok. + +int_pow_test() -> + ?assertEqual(1, int_pow(1, 1)), + ?assertEqual(1, int_pow(1, 0)), + ?assertEqual(1, int_pow(10, 0)), + ?assertEqual(10, int_pow(10, 1)), + ?assertEqual(100, int_pow(10, 2)), + ?assertEqual(1000, int_pow(10, 3)), + ok. + +digits_test() -> + ?assertEqual("0", + digits(0)), + ?assertEqual("0.0", + digits(0.0)), + ?assertEqual("0.0", + digits(-0.0)), + ?assertEqual("1.0", + digits(1.0)), + ?assertEqual("-1.0", + digits(-1.0)), + ?assertEqual("0.1", + digits(0.1)), + ?assertEqual("0.01", + digits(0.01)), + ?assertEqual("0.001", + digits(0.001)), + ?assertEqual("1.0e+6", + digits(1000000.0)), + ?assertEqual("0.5", + digits(0.5)), + ?assertEqual("4503599627370496.0", + digits(4503599627370496.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 =:= 5.0e-324 + <> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual("5.0e-324", + digits(SmallDenorm)), + ?assertEqual(SmallDenorm, + list_to_float(digits(SmallDenorm))), + %% large denormalized number + %% 2.22507385850720088902e-308 + <> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual("2.225073858507201e-308", + digits(BigDenorm)), + ?assertEqual(BigDenorm, + list_to_float(digits(BigDenorm))), + %% small normalized number + %% 2.22507385850720138309e-308 + <> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual("2.2250738585072014e-308", + digits(SmallNorm)), + ?assertEqual(SmallNorm, + list_to_float(digits(SmallNorm))), + %% large normalized number + %% 1.79769313486231570815e+308 + <> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual("1.7976931348623157e+308", + digits(LargeNorm)), + ?assertEqual(LargeNorm, + list_to_float(digits(LargeNorm))), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual("5.0e-324", + digits(math:pow(2, -1074))), + ok. + +frexp_test() -> + %% zero + ?assertEqual({0.0, 0}, frexp(0.0)), + %% one + ?assertEqual({0.5, 1}, frexp(1.0)), + %% negative one + ?assertEqual({-0.5, 1}, frexp(-1.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 + <> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual({0.5, -1073}, frexp(SmallDenorm)), + %% large denormalized number + %% 2.22507385850720088902e-308 + <> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999978, -1022}, + frexp(BigDenorm)), + %% small normalized number + %% 2.22507385850720138309e-308 + <> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual({0.5, -1021}, frexp(SmallNorm)), + %% large normalized number + %% 1.79769313486231570815e+308 + <> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999989, 1024}, + frexp(LargeNorm)), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual( + {0.5, -1073}, + frexp(math:pow(2, -1074))), + ok. + +-endif. diff --git a/src/riak_api.app.src b/src/riak_api.app.src index 5ce5af7..69700be 100644 --- a/src/riak_api.app.src +++ b/src/riak_api.app.src @@ -9,9 +9,7 @@ stdlib, ssl, riak_core, - riak_pb, - webmachine, - mochiweb + riak_pb ]}, {registered, [riak_api_sup, riak_api_pb_sup]}, From a011b1d4cdaa0f6e921f394dabcd94964f35d1e2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 1 Apr 2026 10:56:56 +0100 Subject: [PATCH 20/53] Remove mochijson2/mochinum - included in rhc --- src/mochijson2.erl | 976 --------------------------------------------- src/mochinum.erl | 374 ----------------- 2 files changed, 1350 deletions(-) delete mode 100644 src/mochijson2.erl delete mode 100644 src/mochinum.erl diff --git a/src/mochijson2.erl b/src/mochijson2.erl deleted file mode 100644 index 4a52437..0000000 --- a/src/mochijson2.erl +++ /dev/null @@ -1,976 +0,0 @@ -%% @author Bob Ippolito -%% @copyright 2007 Mochi Media, Inc. -%% -%% Permission is hereby granted, free of charge, to any person obtaining a -%% copy of this software and associated documentation files (the "Software"), -%% to deal in the Software without restriction, including without limitation -%% the rights to use, copy, modify, merge, publish, distribute, sublicense, -%% and/or sell copies of the Software, and to permit persons to whom the -%% Software is furnished to do so, subject to the following conditions: -%% -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. -%% -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -%% DEALINGS IN THE SOFTWARE. - -%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works -%% with binaries as strings, arrays as lists (without an {array, _}) -%% wrapper and it only knows how to decode UTF-8 (and ASCII). -%% -%% JSON terms are decoded as follows (javascript -> erlang): -%%
    -%%
  • {"key": "value"} -> -%% {struct, [{<<"key">>, <<"value">>}]}
  • -%%
  • ["array", 123, 12.34, true, false, null] -> -%% [<<"array">>, 123, 12.34, true, false, null] -%%
  • -%%
-%%
    -%%
  • Strings in JSON decode to UTF-8 binaries in Erlang
  • -%%
  • Objects decode to {struct, PropList}
  • -%%
  • Numbers decode to integer or float
  • -%%
  • true, false, null decode to their respective terms.
  • -%%
-%% The encoder will accept the same format that the decoder will produce, -%% but will also allow additional cases for leniency: -%%
    -%%
  • atoms other than true, false, null will be considered UTF-8 -%% strings (even as a proplist key) -%%
  • -%%
  • {json, IoList} will insert IoList directly into the output -%% with no validation -%%
  • -%%
  • {array, Array} will be encoded as Array -%% (legacy mochijson style) -%%
  • -%%
  • A non-empty raw proplist will be encoded as an object as long -%% as the first pair does not have an atom key of json, struct, -%% or array -%%
  • -%%
- --module(mochijson2). --author('bob@mochimedia.com'). --export([encoder/1, encode/1]). --export([decoder/1, decode/1, decode/2]). - -%% This is a macro to placate syntax highlighters.. --define(Q, $\"). --define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, - column=N+S#decoder.column}). --define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, - column=1+S#decoder.column}). --define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, - column=1, - line=1+S#decoder.line}). --define(INC_CHAR(S, C), - case C of - $\n -> - S#decoder{column=1, - line=1+S#decoder.line, - offset=1+S#decoder.offset}; - _ -> - S#decoder{column=1+S#decoder.column, - offset=1+S#decoder.offset} - end). --define(IS_WHITESPACE(C), - (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). - - -%% @type json_string() = atom | binary() -%% @type json_number() = integer() | float() -%% @type json_array() = [json_term()] -%% @type json_object() = {struct, [{json_string(), json_term()}]} -%% @type json_eep18_object() = {[{json_string(), json_term()}]} -%% @type json_iolist() = {json, iolist()} -%% @type json_term() = json_string() | json_number() | json_array() | -%% json_object() | json_eep18_object() | json_iolist() - --record(encoder, {handler=null, - utf8=false}). - --record(decoder, {object_hook=null, - offset=0, - line=1, - column=1, - state=null}). - -%% @spec encoder([encoder_option()]) -> function() -%% @doc Create an encoder/1 with the given options. -%% @type encoder_option() = handler_option() | utf8_option() -%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) -encoder(Options) -> - State = parse_encoder_options(Options, #encoder{}), - fun (O) -> json_encode(O, State) end. - -%% @spec encode(json_term()) -> iolist() -%% @doc Encode the given as JSON to an iolist. -encode(Any) -> - json_encode(Any, #encoder{}). - -%% @spec decoder([decoder_option()]) -> function() -%% @doc Create a decoder/1 with the given options. -decoder(Options) -> - State = parse_decoder_options(Options, #decoder{}), - fun (O) -> json_decode(O, State) end. - -%% @spec decode(iolist(), [{format, proplist | eep18 | struct | map}]) -> json_term() -%% @doc Decode the given iolist to Erlang terms using the given object format -%% for decoding, where proplist returns JSON objects as [{binary(), json_term()}] -%% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, -%% map returns JSON objects as #{binary() => json_term()}, and struct -%% returns them as-is. -decode(S, Options) -> - json_decode(S, parse_decoder_options(Options, #decoder{})). - -%% @spec decode(iolist()) -> json_term() -%% @doc Decode the given iolist to Erlang terms. -decode(S) -> - json_decode(S, #decoder{}). - -%% Internal API - -parse_encoder_options([], State) -> - State; -parse_encoder_options([{handler, Handler} | Rest], State) -> - parse_encoder_options(Rest, State#encoder{handler=Handler}); -parse_encoder_options([{utf8, Switch} | Rest], State) -> - parse_encoder_options(Rest, State#encoder{utf8=Switch}). - -parse_decoder_options([], State) -> - State; -parse_decoder_options([{object_hook, Hook} | Rest], State) -> - parse_decoder_options(Rest, State#decoder{object_hook=Hook}); -parse_decoder_options([{format, map} | Rest], State) -> - Hook = make_object_hook_for_map(), - parse_decoder_options(Rest, State#decoder{object_hook=Hook}); -parse_decoder_options([{format, Format} | Rest], State) - when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist -> - parse_decoder_options(Rest, State#decoder{object_hook=Format}). - -make_object_hook_for_map() -> - fun ({struct, P}) -> maps:from_list(P) end. - -json_encode(true, _State) -> - <<"true">>; -json_encode(false, _State) -> - <<"false">>; -json_encode(null, _State) -> - <<"null">>; -json_encode(I, _State) when is_integer(I) -> - integer_to_list(I); -json_encode(F, _State) when is_float(F) -> - mochinum:digits(F); -json_encode(S, State) when is_binary(S); is_atom(S) -> - json_encode_string(S, State); -json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso - K =/= array andalso - K =/= json) -> - json_encode_proplist(Props, State); -json_encode({struct, Props}, State) when is_list(Props) -> - json_encode_proplist(Props, State); -json_encode({Props}, State) when is_list(Props) -> - json_encode_proplist(Props, State); -json_encode({}, State) -> - json_encode_proplist([], State); -json_encode(Array, State) when is_list(Array) -> - json_encode_array(Array, State); -json_encode({array, Array}, State) when is_list(Array) -> - json_encode_array(Array, State); -json_encode(M, State) when is_map(M) -> - json_encode_map(M, State); -json_encode({json, IoList}, _State) -> - IoList; -json_encode(Bad, #encoder{handler=null}) -> - exit({json_encode, {bad_term, Bad}}); -json_encode(Bad, State=#encoder{handler=Handler}) -> - json_encode(Handler(Bad), State). - -json_encode_array([], _State) -> - <<"[]">>; -json_encode_array(L, State) -> - F = fun (O, Acc) -> - [$,, json_encode(O, State) | Acc] - end, - [$, | Acc1] = lists:foldl(F, "[", L), - lists:reverse([$\] | Acc1]). - -json_encode_proplist([], _State) -> - <<"{}">>; -json_encode_proplist(Props, State) -> - F = fun ({K, V}, Acc) -> - KS = json_encode_string(K, State), - VS = json_encode(V, State), - [$,, VS, $:, KS | Acc] - end, - [$, | Acc1] = lists:foldl(F, "{", Props), - lists:reverse([$\} | Acc1]). - -json_encode_map(Map, _State) when map_size(Map) =:= 0 -> - <<"{}">>; -json_encode_map(Map, State) -> - F = fun(K, V, Acc) -> - KS = json_encode_string(K, State), - VS = json_encode(V, State), - [$,, VS, $:, KS | Acc] - end, - [$, | Acc1] = maps:fold(F, "{", Map), - lists:reverse([$\} | Acc1]). - -json_encode_string(A, State) when is_atom(A) -> - json_encode_string(atom_to_binary(A, latin1), State); -json_encode_string(B, State) when is_binary(B) -> - case json_bin_is_safe(B) of - true -> - [?Q, B, ?Q]; - false -> - json_encode_string_unicode(unicode:characters_to_list(B), State, [?Q]) - end; -json_encode_string(I, _State) when is_integer(I) -> - [?Q, integer_to_list(I), ?Q]; -json_encode_string(L, State) when is_list(L) -> - case json_string_is_safe(L) of - true -> - [?Q, L, ?Q]; - false -> - json_encode_string_unicode(L, State, [?Q]) - end. - -json_string_is_safe([]) -> - true; -json_string_is_safe([C | Rest]) -> - case C of - ?Q -> - false; - $\\ -> - false; - $\b -> - false; - $\f -> - false; - $\n -> - false; - $\r -> - false; - $\t -> - false; - C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> - false; - C when C < 16#7f -> - json_string_is_safe(Rest); - _ -> - exit({json_encode, {bad_char, C}}) - end. - -json_bin_is_safe(<<>>) -> - true; -json_bin_is_safe(<>) -> - case C of - ?Q -> - false; - $\\ -> - false; - $\b -> - false; - $\f -> - false; - $\n -> - false; - $\r -> - false; - $\t -> - false; - C when C >= 0, C < $\s; C >= 16#7f -> - false; - C when C < 16#7f -> - json_bin_is_safe(Rest) - end. - -json_encode_string_unicode([], _State, Acc) -> - lists:reverse([$\" | Acc]); -json_encode_string_unicode([C | Cs], State, Acc) -> - Acc1 = case C of - ?Q -> - [?Q, $\\ | Acc]; - %% Escaping solidus is only useful when trying to protect - %% against "" injection attacks which are only - %% possible when JSON is inserted into a HTML document - %% in-line. mochijson2 does not protect you from this, so - %% if you do insert directly into HTML then you need to - %% uncomment the following case or escape the output of encode. - %% - %% $/ -> - %% [$/, $\\ | Acc]; - %% - $\\ -> - [$\\, $\\ | Acc]; - $\b -> - [$b, $\\ | Acc]; - $\f -> - [$f, $\\ | Acc]; - $\n -> - [$n, $\\ | Acc]; - $\r -> - [$r, $\\ | Acc]; - $\t -> - [$t, $\\ | Acc]; - C when C >= 0, C < $\s -> - [unihex(C) | Acc]; - C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> - [unicode:characters_to_binary([C]) | Acc]; - C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> - [unihex(C) | Acc]; - C when C < 16#7f -> - [C | Acc]; - _ -> - %% json_string_is_safe guarantees that this branch is dead - exit({json_encode, {bad_char, C}}) - end, - json_encode_string_unicode(Cs, State, Acc1). - -hexdigit(C) when C >= 0, C =< 9 -> - C + $0; -hexdigit(C) when C =< 15 -> - C + $a - 10. - -unihex(C) when C < 16#10000 -> - <> = <>, - Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], - [$\\, $u | Digits]; -unihex(C) when C =< 16#10FFFF -> - N = C - 16#10000, - S1 = 16#d800 bor ((N bsr 10) band 16#3ff), - S2 = 16#dc00 bor (N band 16#3ff), - [unihex(S1), unihex(S2)]. - -json_decode(L, S) when is_list(L) -> - json_decode(iolist_to_binary(L), S); -json_decode(B, S) -> - {Res, S1} = decode1(B, S), - {eof, _} = tokenize(B, S1#decoder{state=trim}), - Res. - -decode1(B, S=#decoder{state=null}) -> - case tokenize(B, S#decoder{state=any}) of - {{const, C}, S1} -> - {C, S1}; - {start_array, S1} -> - decode_array(B, S1); - {start_object, S1} -> - decode_object(B, S1) - end. - -make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct -> - V; -make_object({struct, P}, #decoder{object_hook=eep18}) -> - {P}; -make_object({struct, P}, #decoder{object_hook=proplist}) -> - P; -make_object(V, #decoder{object_hook=Hook}) -> - Hook(V). - -decode_object(B, S) -> - decode_object(B, S#decoder{state=key}, []). - -decode_object(B, S=#decoder{state=key}, Acc) -> - case tokenize(B, S) of - {end_object, S1} -> - V = make_object({struct, lists:reverse(Acc)}, S1), - {V, S1#decoder{state=null}}; - {{const, K}, S1} -> - {colon, S2} = tokenize(B, S1), - {V, S3} = decode1(B, S2#decoder{state=null}), - decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) - end; -decode_object(B, S=#decoder{state=comma}, Acc) -> - case tokenize(B, S) of - {end_object, S1} -> - V = make_object({struct, lists:reverse(Acc)}, S1), - {V, S1#decoder{state=null}}; - {comma, S1} -> - decode_object(B, S1#decoder{state=key}, Acc) - end. - -decode_array(B, S) -> - decode_array(B, S#decoder{state=any}, []). - -decode_array(B, S=#decoder{state=any}, Acc) -> - case tokenize(B, S) of - {end_array, S1} -> - {lists:reverse(Acc), S1#decoder{state=null}}; - {start_array, S1} -> - {Array, S2} = decode_array(B, S1), - decode_array(B, S2#decoder{state=comma}, [Array | Acc]); - {start_object, S1} -> - {Array, S2} = decode_object(B, S1), - decode_array(B, S2#decoder{state=comma}, [Array | Acc]); - {{const, Const}, S1} -> - decode_array(B, S1#decoder{state=comma}, [Const | Acc]) - end; -decode_array(B, S=#decoder{state=comma}, Acc) -> - case tokenize(B, S) of - {end_array, S1} -> - {lists:reverse(Acc), S1#decoder{state=null}}; - {comma, S1} -> - decode_array(B, S1#decoder{state=any}, Acc) - end. - -tokenize_string(B, S=#decoder{offset=O}) -> - case tokenize_string_fast(B, O) of - {escape, O1} -> - Length = O1 - O, - S1 = ?ADV_COL(S, Length), - <<_:O/binary, Head:Length/binary, _/binary>> = B, - tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); - O1 -> - Length = O1 - O, - <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, - {{const, String}, ?ADV_COL(S, Length + 1)} - end. - -tokenize_string_fast(B, O) -> - case B of - <<_:O/binary, ?Q, _/binary>> -> - O; - <<_:O/binary, $\\, _/binary>> -> - {escape, O}; - <<_:O/binary, C1, _/binary>> when C1 < 128 -> - tokenize_string_fast(B, 1 + O); - <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, - C2 >= 128, C2 =< 191 -> - tokenize_string_fast(B, 2 + O); - <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, - C2 >= 128, C2 =< 191, - C3 >= 128, C3 =< 191 -> - tokenize_string_fast(B, 3 + O); - <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, - C2 >= 128, C2 =< 191, - C3 >= 128, C3 =< 191, - C4 >= 128, C4 =< 191 -> - tokenize_string_fast(B, 4 + O); - _ -> - throw(invalid_utf8) - end. - -tokenize_string(B, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, ?Q, _/binary>> -> - {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; - <<_:O/binary, "\\\"", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); - <<_:O/binary, "\\\\", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); - <<_:O/binary, "\\/", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); - <<_:O/binary, "\\b", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); - <<_:O/binary, "\\f", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); - <<_:O/binary, "\\n", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); - <<_:O/binary, "\\r", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); - <<_:O/binary, "\\t", _/binary>> -> - tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); - <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> - C = erlang:list_to_integer([C3, C2, C1, C0], 16), - if C > 16#D7FF, C < 16#DC00 -> - %% coalesce UTF-16 surrogate pair - <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, - D = erlang:list_to_integer([D3,D2,D1,D0], 16), - Acc1 = [unicode:characters_to_binary( - <>, - utf16) - | Acc], - tokenize_string(B, ?ADV_COL(S, 12), Acc1); - true -> - Acc1 = [unicode:characters_to_binary([C]) | Acc], - tokenize_string(B, ?ADV_COL(S, 6), Acc1) - end; - <<_:O/binary, C1, _/binary>> when C1 < 128 -> - tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); - <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, - C2 >= 128, C2 =< 191 -> - tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); - <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, - C2 >= 128, C2 =< 191, - C3 >= 128, C3 =< 191 -> - tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); - <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, - C2 >= 128, C2 =< 191, - C3 >= 128, C3 =< 191, - C4 >= 128, C4 =< 191 -> - tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); - _ -> - throw(invalid_utf8) - end. - -tokenize_number(B, S) -> - case tokenize_number(B, sign, S, []) of - {{int, Int}, S1} -> - {{const, list_to_integer(Int)}, S1}; - {{float, Float}, S1} -> - {{const, list_to_float(Float)}, S1} - end. - -tokenize_number(B, sign, S=#decoder{offset=O}, []) -> - case B of - <<_:O/binary, $-, _/binary>> -> - tokenize_number(B, int, ?INC_COL(S), [$-]); - _ -> - tokenize_number(B, int, S, []) - end; -tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, $0, _/binary>> -> - tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); - <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> - tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) - end; -tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> - tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); - _ -> - tokenize_number(B, frac, S, Acc) - end; -tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> - tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); - <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> - tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); - _ -> - {{int, lists:reverse(Acc)}, S} - end; -tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> - tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); - <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> - tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); - _ -> - {{float, lists:reverse(Acc)}, S} - end; -tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> - tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); - _ -> - tokenize_number(B, eint, S, Acc) - end; -tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> - tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) - end; -tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> - case B of - <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> - tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); - _ -> - {{float, lists:reverse(Acc)}, S} - end. - -tokenize(B, S=#decoder{offset=O}) -> - case B of - <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> - tokenize(B, ?INC_CHAR(S, C)); - <<_:O/binary, "{", _/binary>> -> - {start_object, ?INC_COL(S)}; - <<_:O/binary, "}", _/binary>> -> - {end_object, ?INC_COL(S)}; - <<_:O/binary, "[", _/binary>> -> - {start_array, ?INC_COL(S)}; - <<_:O/binary, "]", _/binary>> -> - {end_array, ?INC_COL(S)}; - <<_:O/binary, ",", _/binary>> -> - {comma, ?INC_COL(S)}; - <<_:O/binary, ":", _/binary>> -> - {colon, ?INC_COL(S)}; - <<_:O/binary, "null", _/binary>> -> - {{const, null}, ?ADV_COL(S, 4)}; - <<_:O/binary, "true", _/binary>> -> - {{const, true}, ?ADV_COL(S, 4)}; - <<_:O/binary, "false", _/binary>> -> - {{const, false}, ?ADV_COL(S, 5)}; - <<_:O/binary, "\"", _/binary>> -> - tokenize_string(B, ?INC_COL(S)); - <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) - orelse C =:= $- -> - tokenize_number(B, S); - <<_:O/binary>> -> - trim = S#decoder.state, - {eof, S} - end. -%% -%% Tests -%% --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -%% testing constructs borrowed from the Yaws JSON implementation. - -%% Create an object from a list of Key/Value pairs. - -obj_new() -> - {struct, []}. - -is_obj({struct, Props}) -> - F = fun ({K, _}) when is_binary(K) -> true end, - lists:all(F, Props). - -obj_from_list(Props) -> - Obj = {struct, Props}, - ?assert(is_obj(Obj)), - Obj. - -%% Test for equivalence of Erlang terms. -%% Due to arbitrary order of construction, equivalent objects might -%% compare unequal as erlang terms, so we need to carefully recurse -%% through aggregates (tuples and objects). - -equiv({struct, Props1}, {struct, Props2}) -> - equiv_object(Props1, Props2); -equiv(L1, L2) when is_list(L1), is_list(L2) -> - equiv_list(L1, L2); -equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; -equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; -equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. - -%% Object representation and traversal order is unknown. -%% Use the sledgehammer and sort property lists. - -equiv_object(Props1, Props2) -> - L1 = lists:keysort(1, Props1), - L2 = lists:keysort(1, Props2), - Pairs = lists:zip(L1, L2), - true = lists:all(fun({{K1, V1}, {K2, V2}}) -> - equiv(K1, K2) and equiv(V1, V2) - end, Pairs). - -%% Recursively compare tuple elements for equivalence. - -equiv_list([], []) -> - true; -equiv_list([V1 | L1], [V2 | L2]) -> - equiv(V1, V2) andalso equiv_list(L1, L2). - -decode_test() -> - [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), - <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). - -e2j_vec_test() -> - test_one(e2j_test_vec(utf8), 1). - -test_one([], _N) -> - %% io:format("~p tests passed~n", [N-1]), - ok; -test_one([{E, J} | Rest], N) -> - %% io:format("[~p] ~p ~p~n", [N, E, J]), - true = equiv(E, decode(J)), - true = equiv(E, decode(encode(E))), - test_one(Rest, 1+N). - -e2j_test_vec(utf8) -> - [ - {1, "1"}, - {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes - {-1, "-1"}, - {-3.1416, "-3.14160"}, - {12.0e10, "1.20000e+11"}, - {1.234E+10, "1.23400e+10"}, - {-1.234E-10, "-1.23400e-10"}, - {10.0, "1.0e+01"}, - {123.456, "1.23456E+2"}, - {10.0, "1e1"}, - {<<"foo">>, "\"foo\""}, - {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, - {<<"">>, "\"\""}, - {<<"\n\n\n">>, "\"\\n\\n\\n\""}, - {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, - {obj_new(), "{}"}, - {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, - {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), - "{\"foo\":\"bar\",\"baz\":123}"}, - {[], "[]"}, - {[[]], "[[]]"}, - {[1, <<"foo">>], "[1,\"foo\"]"}, - - %% json array in a json object - {obj_from_list([{<<"foo">>, [123]}]), - "{\"foo\":[123]}"}, - - %% json object in a json object - {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), - "{\"foo\":{\"bar\":true}}"}, - - %% fold evaluation order - {obj_from_list([{<<"foo">>, []}, - {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, - {<<"alice">>, <<"bob">>}]), - "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, - - %% json object in a json array - {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], - "[-123,\"foo\",{\"bar\":[]},null]"} - ]. - -%% test utf8 encoding -encoder_utf8_test() -> - %% safe conversion case (default) - <<"\"\\u0001\\u0442\\u0435\\u0441\\u0442\"">> = - iolist_to_binary(encode(<<1,"\321\202\320\265\321\201\321\202">>)), - - %% raw utf8 output (optional) - Enc = mochijson2:encoder([{utf8, true}]), - <<34,"\\u0001",209,130,208,181,209,129,209,130,34>> = - iolist_to_binary(Enc(<<1,"\321\202\320\265\321\201\321\202">>)). - -input_validation_test() -> - Good = [ - {16#00A3, <>}, %% pound - {16#20AC, <>}, %% euro - {16#10196, <>} %% denarius - ], - lists:foreach(fun({CodePoint, UTF8}) -> - Expect = unicode:characters_to_binary([CodePoint]), - Expect = decode(UTF8) - end, Good), - - Bad = [ - %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte - <>, - %% missing continuations, last byte in each should be 80-BF - <>, - <>, - <>, - %% we don't support code points > 10FFFF per RFC 3629 - <>, - %% escape characters trigger a different code path - <> - ], - lists:foreach( - fun(X) -> - ok = try decode(X) catch invalid_utf8 -> ok end, - %% could be {ucs,{bad_utf8_character_code}} or - %% {json_encode,{bad_char,_}} - {'EXIT', _} = (catch encode(X)) - end, Bad). - -inline_json_test() -> - ?assertEqual(<<"\"iodata iodata\"">>, - iolist_to_binary( - encode({json, [<<"\"iodata">>, " iodata\""]}))), - ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, - decode( - encode({struct, - [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), - ok. - -big_unicode_test() -> - UTF8Seq = unicode:characters_to_binary([16#0001d120]), - ?assertEqual( - <<"\"\\ud834\\udd20\"">>, - iolist_to_binary(encode(UTF8Seq))), - ?assertEqual( - UTF8Seq, - decode(iolist_to_binary(encode(UTF8Seq)))), - ok. - -custom_decoder_test() -> - ?assertEqual( - {struct, [{<<"key">>, <<"value">>}]}, - (decoder([]))("{\"key\": \"value\"}")), - F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, - ?assertEqual( - win, - (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), - ok. - -atom_test() -> - %% JSON native atoms - [begin - ?assertEqual(A, decode(atom_to_list(A))), - ?assertEqual(iolist_to_binary(atom_to_list(A)), - iolist_to_binary(encode(A))) - end || A <- [true, false, null]], - %% Atom to string - ?assertEqual( - <<"\"foo\"">>, - iolist_to_binary(encode(foo))), - ?assertEqual( - <<"\"\\ud834\\udd20\"">>, - iolist_to_binary( - encode( - binary_to_atom( - unicode:characters_to_binary([16#0001d120]), latin1)))), - ok. - -key_encode_test() -> - %% Some forms are accepted as keys that would not be strings in other - %% cases - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode({struct, [{foo, 1}]}))), - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode({struct, [{"foo", 1}]}))), - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode([{foo, 1}]))), - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode([{<<"foo">>, 1}]))), - ?assertEqual( - <<"{\"foo\":1}">>, - iolist_to_binary(encode([{"foo", 1}]))), - ?assertEqual( - <<"{\"\\ud834\\udd20\":1}">>, - iolist_to_binary( - encode({struct, [{[16#0001d120], 1}]}))), - ?assertEqual( - <<"{\"1\":1}">>, - iolist_to_binary(encode({struct, [{1, 1}]}))), - ok. - -unsafe_chars_test() -> - Chars = "\"\\\b\f\n\r\t", - [begin - ?assertEqual(false, json_string_is_safe([C])), - ?assertEqual(false, json_bin_is_safe(<>)), - ?assertEqual(<>, decode(encode(<>))) - end || C <- Chars], - ?assertEqual( - false, - json_string_is_safe([16#0001d120])), - ?assertEqual( - false, - json_bin_is_safe(unicode:characters_to_binary([16#0001d120]))), - ?assertEqual( - [16#0001d120], - unicode:characters_to_list( - decode( - encode( - binary_to_atom( - unicode:characters_to_binary([16#0001d120]), - latin1))))), - ?assertEqual( - false, - json_string_is_safe([16#10ffff])), - ?assertEqual( - false, - json_bin_is_safe(unicode:characters_to_binary([16#10ffff]))), - %% solidus can be escaped but isn't unsafe by default - ?assertEqual( - <<"/">>, - decode(<<"\"\\/\"">>)), - ok. - -int_test() -> - ?assertEqual(0, decode("0")), - ?assertEqual(1, decode("1")), - ?assertEqual(11, decode("11")), - ok. - -large_int_test() -> - ?assertEqual(<<"-2147483649214748364921474836492147483649">>, - iolist_to_binary(encode(-2147483649214748364921474836492147483649))), - ?assertEqual(<<"2147483649214748364921474836492147483649">>, - iolist_to_binary(encode(2147483649214748364921474836492147483649))), - ok. - -float_test() -> - ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), - ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), - ok. - -handler_test() -> - ?assertEqual( - {'EXIT',{json_encode,{bad_term,{x,y}}}}, - catch encode({x,y})), - F = fun ({x,y}) -> [] end, - ?assertEqual( - <<"[]">>, - iolist_to_binary((encoder([{handler, F}]))({x, y}))), - ok. - -encode_empty_test_() -> - [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))} - || {A, B} <- [{"eep18 {}", {}}, - {"eep18 {[]}", {[]}}, - {"{struct, []}", {struct, []}}]]. - -encode_test_() -> - P = [{<<"k">>, <<"v">>}], - JSON = iolist_to_binary(encode({struct, P})), - [{atom_to_list(F), - ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))} - || F <- [struct, eep18, proplist]]. - -format_test_() -> - P = [{<<"k">>, <<"v">>}], - JSON = iolist_to_binary(encode({struct, P})), - [{atom_to_list(F), - ?_assertEqual(A, decode(JSON, [{format, F}]))} - || {F, A} <- [{struct, {struct, P}}, - {eep18, {P}}, - {proplist, P}]]. - -array_test() -> - A = [<<"hello">>], - ?assertEqual(A, decode(encode({array, A}))). - -bad_char_test() -> - ?assertEqual( - {'EXIT', {json_encode, {bad_char, 16#110000}}}, - catch json_string_is_safe([16#110000])). - -utf8_roundtrip_test_() -> - %% These are the boundary cases for UTF8 encoding - Codepoints = [%% 7 bits -> 1 byte - 16#00, 16#7f, - %% 11 bits -> 2 bytes - 16#080, 16#07ff, - %% 16 bits -> 3 bytes - 16#0800, 16#ffff, - 16#d7ff, 16#e000, - %% 21 bits -> 4 bytes - 16#010000, 16#10ffff], - UTF8 = unicode:characters_to_binary(Codepoints), - Encode = encoder([{utf8, true}]), - [{"roundtrip escaped", - ?_assertEqual(UTF8, decode(encode(UTF8)))}, - {"roundtrip utf8", - ?_assertEqual(UTF8, decode(Encode(UTF8)))}]. - -utf8_non_character_test_() -> - S = unicode:characters_to_binary([16#ffff, 16#fffe]), - [{"roundtrip escaped", ?_assertEqual(S, decode(encode(S)))}, - {"roundtrip utf8", ?_assertEqual(S, decode((encoder([{utf8, true}]))(S)))}]. - -decode_map_test() -> - Json = "{\"var1\": 3, \"var2\": {\"var3\": 7}}", - M = #{<<"var1">> => 3,<<"var2">> => #{<<"var3">> => 7}}, - ?assertEqual(M, decode(Json, [{format, map}])). - -encode_map_test() -> - ?assertEqual(<<"{\"a\":1}">>, iolist_to_binary(encode(#{a => 1}))), - M = #{<<"a">> => 1, <<"b">> => #{<<"c">> => 2}}, - ?assertEqual(M, decode(iolist_to_binary(encode(#{a => 1, b => #{ c => 2}})), [{format, map}])). - -encode_empty_map_test() -> - ?assertEqual(<<"{}">>, encode(#{})). - --endif. diff --git a/src/mochinum.erl b/src/mochinum.erl deleted file mode 100644 index 867d6c9..0000000 --- a/src/mochinum.erl +++ /dev/null @@ -1,374 +0,0 @@ -%% @copyright 2007 Mochi Media, Inc. -%% @author Bob Ippolito -%% -%% Permission is hereby granted, free of charge, to any person obtaining a -%% copy of this software and associated documentation files (the "Software"), -%% to deal in the Software without restriction, including without limitation -%% the rights to use, copy, modify, merge, publish, distribute, sublicense, -%% and/or sell copies of the Software, and to permit persons to whom the -%% Software is furnished to do so, subject to the following conditions: -%% -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. -%% -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -%% DEALINGS IN THE SOFTWARE. - -%% @doc Useful numeric algorithms for floats that cover some deficiencies -%% in the math module. More interesting is digits/1, which implements -%% the algorithm from: -%% http://www.cs.indiana.edu/~burger/fp/index.html -%% See also "Printing Floating-Point Numbers Quickly and Accurately" -%% in Proceedings of the SIGPLAN '96 Conference on Programming Language -%% Design and Implementation. - --module(mochinum). --author("Bob Ippolito "). --export([digits/1, frexp/1, int_pow/2, int_ceil/1]). - -%% IEEE 754 Float exponent bias --define(FLOAT_BIAS, 1022). --define(MIN_EXP, -1074). --define(BIG_POW, 4503599627370496). - -%% External API - -%% @spec digits(number()) -> string() -%% @doc Returns a string that accurately represents the given integer or float -%% using a conservative amount of digits. Great for generating -%% human-readable output, or compact ASCII serializations for floats. -digits(N) when is_integer(N) -> - integer_to_list(N); -digits(Float) when Float == 0.0 -> - "0.0"; -digits(Float) -> - {Frac1, Exp1} = frexp_int(Float), - [Place0 | Digits0] = digits1(Float, Exp1, Frac1), - {Place, Digits} = transform_digits(Place0, Digits0), - R = insert_decimal(Place, Digits), - case Float < 0 of - true -> - [$- | R]; - _ -> - R - end. - -%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} -%% @doc Return the fractional and exponent part of an IEEE 754 double, -%% equivalent to the libc function of the same name. -%% F = Frac * pow(2, Exp). -frexp(F) -> - frexp1(unpack(F)). - -%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() -%% @doc Moderately efficient way to exponentiate integers. -%% int_pow(10, 2) = 100. -int_pow(_X, 0) -> - 1; -int_pow(X, N) when N > 0 -> - int_pow(X, N, 1). - -%% @spec int_ceil(F::float()) -> integer() -%% @doc Return the ceiling of F as an integer. The ceiling is defined as -%% F when F == trunc(F); -%% trunc(F) when F < 0; -%% trunc(F) + 1 when F > 0. -int_ceil(X) -> - T = trunc(X), - case (X - T) of - Pos when Pos > 0 -> T + 1; - _ -> T - end. - - -%% Internal API - -int_pow(X, N, R) when N < 2 -> - R * X; -int_pow(X, N, R) -> - int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). - -insert_decimal(0, S) -> - "0." ++ S; -insert_decimal(Place, S) when Place > 0 -> - L = length(S), - case Place - L of - 0 -> - S ++ ".0"; - N when N < 0 -> - {S0, S1} = lists:split(L + N, S), - S0 ++ "." ++ S1; - N when N < 6 -> - %% More places than digits - S ++ lists:duplicate(N, $0) ++ ".0"; - _ -> - insert_decimal_exp(Place, S) - end; -insert_decimal(Place, S) when Place > -6 -> - "0." ++ lists:duplicate(abs(Place), $0) ++ S; -insert_decimal(Place, S) -> - insert_decimal_exp(Place, S). - -insert_decimal_exp(Place, S) -> - [C | S0] = S, - S1 = case S0 of - [] -> - "0"; - _ -> - S0 - end, - Exp = case Place < 0 of - true -> - "e-"; - false -> - "e+" - end, - [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). - - -digits1(Float, Exp, Frac) -> - Round = ((Frac band 1) =:= 0), - case Exp >= 0 of - true -> - BExp = 1 bsl Exp, - case (Frac =/= ?BIG_POW) of - true -> - scale((Frac * BExp * 2), 2, BExp, BExp, - Round, Round, Float); - false -> - scale((Frac * BExp * 4), 4, (BExp * 2), BExp, - Round, Round, Float) - end; - false -> - case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of - true -> - scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, - Round, Round, Float); - false -> - scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, - Round, Round, Float) - end - end. - -scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> - Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), - %% Note that the scheme implementation uses a 326 element look-up table - %% for int_pow(10, N) where we do not. - case Est >= 0 of - true -> - fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, - LowOk, HighOk); - false -> - Scale = int_pow(10, -Est), - fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, - LowOk, HighOk) - end. - -fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> - TooLow = case HighOk of - true -> - (R + MPlus) >= S; - false -> - (R + MPlus) > S - end, - case TooLow of - true -> - [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; - false -> - [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] - end. - -generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> - D = R0 div S, - R = R0 rem S, - TC1 = case LowOk of - true -> - R =< MMinus; - false -> - R < MMinus - end, - TC2 = case HighOk of - true -> - (R + MPlus) >= S; - false -> - (R + MPlus) > S - end, - case TC1 of - false -> - case TC2 of - false -> - [D | generate(R * 10, S, MPlus * 10, MMinus * 10, - LowOk, HighOk)]; - true -> - [D + 1] - end; - true -> - case TC2 of - false -> - [D]; - true -> - case R * 2 < S of - true -> - [D]; - false -> - [D + 1] - end - end - end. - -unpack(Float) -> - <> = <>, - {Sign, Exp, Frac}. - -frexp1({_Sign, 0, 0}) -> - {0.0, 0}; -frexp1({Sign, 0, Frac}) -> - Exp = log2floor(Frac), - <> = <>, - {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; -frexp1({Sign, Exp, Frac}) -> - <> = <>, - {Frac1, Exp - ?FLOAT_BIAS}. - -log2floor(Int) -> - log2floor(Int, 0). - -log2floor(0, N) -> - N; -log2floor(Int, N) -> - log2floor(Int bsr 1, 1 + N). - - -transform_digits(Place, [0 | Rest]) -> - transform_digits(Place, Rest); -transform_digits(Place, Digits) -> - {Place, [$0 + D || D <- Digits]}. - - -frexp_int(F) -> - case unpack(F) of - {_Sign, 0, Frac} -> - {Frac, ?MIN_EXP}; - {_Sign, Exp, Frac} -> - {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS} - end. - -%% -%% Tests -%% --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -int_ceil_test() -> - ?assertEqual(1, int_ceil(0.0001)), - ?assertEqual(0, int_ceil(0.0)), - ?assertEqual(1, int_ceil(0.99)), - ?assertEqual(1, int_ceil(1.0)), - ?assertEqual(-1, int_ceil(-1.5)), - ?assertEqual(-2, int_ceil(-2.0)), - ok. - -int_pow_test() -> - ?assertEqual(1, int_pow(1, 1)), - ?assertEqual(1, int_pow(1, 0)), - ?assertEqual(1, int_pow(10, 0)), - ?assertEqual(10, int_pow(10, 1)), - ?assertEqual(100, int_pow(10, 2)), - ?assertEqual(1000, int_pow(10, 3)), - ok. - -digits_test() -> - ?assertEqual("0", - digits(0)), - ?assertEqual("0.0", - digits(0.0)), - ?assertEqual("0.0", - digits(-0.0)), - ?assertEqual("1.0", - digits(1.0)), - ?assertEqual("-1.0", - digits(-1.0)), - ?assertEqual("0.1", - digits(0.1)), - ?assertEqual("0.01", - digits(0.01)), - ?assertEqual("0.001", - digits(0.001)), - ?assertEqual("1.0e+6", - digits(1000000.0)), - ?assertEqual("0.5", - digits(0.5)), - ?assertEqual("4503599627370496.0", - digits(4503599627370496.0)), - %% small denormalized number - %% 4.94065645841246544177e-324 =:= 5.0e-324 - <> = <<0,0,0,0,0,0,0,1>>, - ?assertEqual("5.0e-324", - digits(SmallDenorm)), - ?assertEqual(SmallDenorm, - list_to_float(digits(SmallDenorm))), - %% large denormalized number - %% 2.22507385850720088902e-308 - <> = <<0,15,255,255,255,255,255,255>>, - ?assertEqual("2.225073858507201e-308", - digits(BigDenorm)), - ?assertEqual(BigDenorm, - list_to_float(digits(BigDenorm))), - %% small normalized number - %% 2.22507385850720138309e-308 - <> = <<0,16,0,0,0,0,0,0>>, - ?assertEqual("2.2250738585072014e-308", - digits(SmallNorm)), - ?assertEqual(SmallNorm, - list_to_float(digits(SmallNorm))), - %% large normalized number - %% 1.79769313486231570815e+308 - <> = <<127,239,255,255,255,255,255,255>>, - ?assertEqual("1.7976931348623157e+308", - digits(LargeNorm)), - ?assertEqual(LargeNorm, - list_to_float(digits(LargeNorm))), - %% issue #10 - mochinum:frexp(math:pow(2, -1074)). - ?assertEqual("5.0e-324", - digits(math:pow(2, -1074))), - ok. - -frexp_test() -> - %% zero - ?assertEqual({0.0, 0}, frexp(0.0)), - %% one - ?assertEqual({0.5, 1}, frexp(1.0)), - %% negative one - ?assertEqual({-0.5, 1}, frexp(-1.0)), - %% small denormalized number - %% 4.94065645841246544177e-324 - <> = <<0,0,0,0,0,0,0,1>>, - ?assertEqual({0.5, -1073}, frexp(SmallDenorm)), - %% large denormalized number - %% 2.22507385850720088902e-308 - <> = <<0,15,255,255,255,255,255,255>>, - ?assertEqual( - {0.99999999999999978, -1022}, - frexp(BigDenorm)), - %% small normalized number - %% 2.22507385850720138309e-308 - <> = <<0,16,0,0,0,0,0,0>>, - ?assertEqual({0.5, -1021}, frexp(SmallNorm)), - %% large normalized number - %% 1.79769313486231570815e+308 - <> = <<127,239,255,255,255,255,255,255>>, - ?assertEqual( - {0.99999999999999989, 1024}, - frexp(LargeNorm)), - %% issue #10 - mochinum:frexp(math:pow(2, -1074)). - ?assertEqual( - {0.5, -1073}, - frexp(math:pow(2, -1074))), - ok. - --endif. From 3a1714fc3ab21674e23cbc3a0a66c2ad9605c1d9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 1 Apr 2026 13:03:39 +0100 Subject: [PATCH 21/53] Remove leading empty bin if it occurs Where URI starts "/" will add a leading <<>> to the split path - which is confusing and easy to forget about. --- src/riak_api_web_acceptor.erl | 8 +++++++- test/riak_api_web_ets_store.erl | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index aafb324..d7a5039 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -257,7 +257,13 @@ split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), - SplitPath = string:split(Path, <<"/">>, all), + SplitPath = + case string:split(Path, <<"/">>, all) of + [<<>>|Rest] -> + Rest; + PathList when is_list(PathList) -> + PathList + end, case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> {ok, {Path, SplitPath, QueryParams}}; diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index 06112f2..a7c6b88 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -72,7 +72,7 @@ no_match | {method_not_allowed, list(riak_api_web_acceptor:method())} | {ok, context(), riak_api_web_handler:limits()}. -match_route(Method, _P, [<<>>, <<"ets_object">>, <<"key">>, Key]) when +match_route(Method, _P, [<<"ets_object">>, <<"key">>, Key]) when Method == 'GET'; Method == 'PUT' -> { @@ -80,9 +80,9 @@ match_route(Method, _P, [<<>>, <<"ets_object">>, <<"key">>, Key]) when #context{key = Key, method = Method, type = object}, {10, 1024, 16 * 1024} }; -match_route(_, _, [<<>>, <<"ets_object">>, <<"key">>, _Key]) -> +match_route(_, _, [<<"ets_object">>, <<"key">>, _Key]) -> {method_not_allowed, ['GET', 'PUT']}; -match_route(Method, _P, [<<>>, <<"ets_file">>, <<"filename">>, Key]) when +match_route(Method, _P, [<<"ets_file">>, <<"filename">>, Key]) when Method == 'GET'; Method == 'PUT' -> { From 01ddd3f04c5133c4cf2ec831e7c3033b6c317ca1 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 1 Apr 2026 13:19:31 +0100 Subject: [PATCH 22/53] Use nomatch, and context to be last not first Address initial feedback --- src/riak_api_web.erl | 10 ++--- src/riak_api_web_acceptor.erl | 26 +++++------ src/riak_api_web_handler.erl | 28 ++++++------ test/riak_api_web_ets_store.erl | 76 ++++++++++++++++---------------- test/riak_api_web_get_random.erl | 56 +++++++++++------------ 5 files changed, 98 insertions(+), 98 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 2881895..c35a588 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -54,8 +54,8 @@ add_routes(Routes) -> { ok, module(), - any(), - {pos_integer(), pos_integer(), pos_integer()} + {pos_integer(), pos_integer(), pos_integer()}, + any() } | riak_api_web_acceptor:halt_response(). get_route(Method, Path, SplitPath) -> @@ -66,7 +66,7 @@ get_route([], _Method, _Path, _SP) -> {halt, 404, [], <<>>, []}; get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> case CallbackMod:match_route(Method, Path, SplitPath) of - no_match -> + nomatch -> get_route(Rest, Method, Path, SplitPath); {method_not_allowed, AllowedMethods} -> AllowHdrVal = @@ -77,8 +77,8 @@ get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> ) ), {halt, 405, [{'Allow', AllowHdrVal}], <<>>, []}; - {ok, Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} -> - {ok, CallbackMod, Context, {MaxHdrCount, MaxHdrSize, MaxBodySize}} + {ok, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} -> + {ok, CallbackMod, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} end. get_listeners() -> diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index d7a5039..5d25f42 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -148,8 +148,8 @@ handle_request(Socket, InitBuffer) -> { ok, CallbackMod, - InitModCtx, - {MaxHdrCount, MaxHdrSize, MaxBodySize} + {MaxHdrCount, MaxHdrSize, MaxBodySize}, + InitModCtx } ?= riak_api_web:get_route(Method, Path, SplitPath), {ok, ReqHeaders, BdyBuffer} ?= @@ -160,15 +160,15 @@ handle_request(Socket, InitBuffer) -> ), {ok, ModCtx1} ?= CallbackMod:check_permissions( - InitModCtx, ReqHeaders, element(1, Socket), - Peer + Peer, + InitModCtx ), {ok, ModCtx2} ?= - CallbackMod:parse_query_params(ModCtx1, QueryParams), + CallbackMod:parse_query_params(QueryParams, ModCtx1), {ok, ModCtx3} ?= - CallbackMod:parse_request_headers(ModCtx2, ReqHeaders), + CallbackMod:parse_request_headers(ReqHeaders, ModCtx2), {ok, {CLorChunk, UseGzip}} ?= expect_body(ReqHeaders), {ok, InitReqBdy} ?= riak_api_web_body:initiate_body( @@ -179,10 +179,10 @@ handle_request(Socket, InitBuffer) -> MaxBodySize ), ok ?= send_continue(Socket, ReqHeaders), - {ok, ModCtx4, {Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1}} ?= + {ok, {Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1}, ModCtx4} ?= CallbackMod:process_request( - ModCtx3, - InitReqBdy + InitReqBdy, + ModCtx3 ), Keepalive = request_prefers_keepalive(Version, ReqHeaders) andalso @@ -509,9 +509,9 @@ handle_response( ResponseCompleteTime = os:system_time(microsecond), ok = CallbackMod:record_request( - Context, {StartTime, RequestCompleteTime, ResponseCompleteTime}, - stream_complete + stream_complete, + Context ), {Keepalive, BufferIn}; handle_response( @@ -532,9 +532,9 @@ handle_response( ResponseCompleteTime = os:system_time(microsecond), ok = CallbackMod:record_request( - Context, {StartTime, RequestCompleteTime, ResponseCompleteTime}, - send_complete + send_complete, + Context ), {Keepalive, BufferIn}; handle_response({halt, RspCode, RspHeaders, RspBody, Socket}) -> diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 29109c2..df5a710 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -63,16 +63,16 @@ %% @doc match_route for the module %% When called each route handled by this module must be checked, and either -%% `no_match` returned should none match - or the initial context with the +%% `nomatch` returned should none match - or the initial context with the %% limits for that route. -callback match_route( riak_api_web_acceptor:method(), unicode:chardata(), list(unicode:chardata()) ) -> - no_match | + nomatch | {method_not_allowed, list(riak_api_web_acceptor:method())} | - {ok, context(), limits()}. + {ok, limits(), context()}. -type peer() :: inet:ip_address(). %% The IP address of the client device connected to the socket @@ -85,10 +85,10 @@ %% On failure return a halt_response with e.g. 401 /403 response codes -callback check_permissions( - context(), riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - peer() + peer(), + context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). @@ -100,16 +100,16 @@ %% parameter had no value - in which case the value will be the atom `true` -callback parse_query_params( - context(), - query_params() + query_params(), + context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). %% @doc parse and validate the request headers -callback parse_request_headers( - context(), - riak_api_web_headers:headers() + riak_api_web_headers:headers(), + context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). @@ -153,19 +153,19 @@ %% the buffer is available to the acceptor. -callback process_request( - context(), - riak_api_web_body:req_body() + riak_api_web_body:req_body(), + context() ) -> { ok, - context(), { riak_api_web_acceptor:response_code(), riak_api_web_headers:header_list(), response_body(), boolean(), riak_api_web_body:req_body() - } + }, + context() } | riak_api_web_acceptor:halt_response(). -type timings() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. @@ -179,4 +179,4 @@ % was the output sent chunk encoded, or sent as a whole body %% @doc Record the output of the interaction --callback record_request(context(), timings(), completion()) -> ok. \ No newline at end of file +-callback record_request(timings(), completion(), context()) -> ok. \ No newline at end of file diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index a7c6b88..d3b693a 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -69,16 +69,16 @@ unicode:chardata(), list(unicode:chardata()) ) -> - no_match + nomatch | {method_not_allowed, list(riak_api_web_acceptor:method())} - | {ok, context(), riak_api_web_handler:limits()}. + | {ok, riak_api_web_handler:limits(), context()}. match_route(Method, _P, [<<"ets_object">>, <<"key">>, Key]) when Method == 'GET'; Method == 'PUT' -> { ok, - #context{key = Key, method = Method, type = object}, - {10, 1024, 16 * 1024} + {10, 1024, 16 * 1024}, + #context{key = Key, method = Method, type = object} }; match_route(_, _, [<<"ets_object">>, <<"key">>, _Key]) -> {method_not_allowed, ['GET', 'PUT']}; @@ -87,98 +87,98 @@ match_route(Method, _P, [<<"ets_file">>, <<"filename">>, Key]) when -> { ok, - #context{key = Key, method = Method, type = file}, - {10, 1024, 1024 * 1024} + {10, 1024, 1024 * 1024}, + #context{key = Key, method = Method, type = object} }; match_route(_, _, _) -> - no_match. + nomatch. %% @doc check_permissions for using this module or route -spec check_permissions( - context(), riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - riak_api_web_handler:peer() + riak_api_web_handler:peer(), + context() ) -> {ok, context()}. -check_permissions(Ctx, _Hdrs, _Scheme, _Peer) -> +check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> {ok, Ctx}. %% @doc parse and validate query params, passed as a map -spec parse_query_params( - context(), - riak_api_web_handler:query_params() + riak_api_web_handler:query_params(), + context() ) -> {ok, context()} | riak_api_web_acceptor:halt_response(). -parse_query_params(Ctx, _Params) -> +parse_query_params(_Params, Ctx) -> {ok, Ctx}. %% @doc parse and validate the request headers -spec parse_request_headers( - context(), - riak_api_web_headers:headers() + riak_api_web_headers:headers(), + context() ) -> {ok, context()} | riak_api_web_acceptor:halt_response(). -parse_request_headers(Ctx, _ReqHeaders) -> +parse_request_headers(_ReqHeaders, Ctx) -> {ok, Ctx}. %% @doc Process the request and produce a response -spec process_request( - context(), - riak_api_web_body:req_body() + riak_api_web_body:req_body(), + context() ) -> { ok, - context(), { riak_api_web_acceptor:response_code(), riak_api_web_headers:header_list(), riak_api_web_handler:response_body(), boolean(), riak_api_web_body:req_body() - } + }, + context() }. process_request( - Ctx = #context{key = Key, method = 'GET', type = object}, RqBdy + RqBdy, Ctx = #context{key = Key, method = 'GET', type = object} ) -> case ets:lookup(?MODULE, {object, Key}) of [{{object, Key}, Value}] -> - {ok, Ctx, {200, [], Value, true, RqBdy}}; + {ok, {200, [], Value, true, RqBdy}, Ctx}; [] -> - {ok, Ctx, {404, [], <<>>, true, RqBdy}} + {ok, {404, [], <<>>, true, RqBdy}, Ctx} end; process_request( - Ctx = #context{key = Key, method = 'PUT', type = object}, RqBdy + RqBdy, Ctx = #context{key = Key, method = 'PUT', type = object} ) -> case riak_api_web_body:get_body(RqBdy, all, 10000) of {Value, UpdRqBdy} when is_binary(Value) -> ets:insert(?MODULE, {{object, Key}, Value}), ETag = base64:encode(crypto:hash(md5, Value), #{mode => urlsafe}), - {ok, Ctx, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}}; + {ok, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}, Ctx}; {error, content_too_large} -> - {ok, Ctx, {413, [], <<>>, false, RqBdy}} + {ok, {413, [], <<>>, false, RqBdy}, Ctx} end; process_request( - Ctx = #context{key = Key, method = 'GET', type = file}, RqBdy + RqBdy, Ctx = #context{key = Key, method = 'GET', type = file} ) -> case ets:lookup(?MODULE, {file, Key}) of [{{file, Key}, SliceList}] -> { ok, - Ctx, { 200, [], {stream, slice_stream_fun(lists:sort(SliceList))}, true, RqBdy - } + }, + Ctx }; [] -> - {ok, Ctx, {404, [], <<>>, true, RqBdy}} + {ok, {404, [], <<>>, true, RqBdy}, Ctx} end; process_request( - Ctx = #context{key = Key, method = 'PUT', type = file}, RqBdy + RqBdy, Ctx = #context{key = Key, method = 'PUT', type = file} ) -> case riak_api_web_body:get_body(RqBdy, ?SLICE_SIZE, 10000) of {Slice, UpdRqBdy} when is_binary(Slice) -> @@ -186,6 +186,7 @@ process_request( SliceSize = byte_size(Slice), ets:insert_new(?MODULE, {{slice, SliceKey}, Slice}), process_request( + UpdRqBdy, Ctx#context{ slice_list = [ @@ -196,8 +197,7 @@ process_request( | Ctx#context.slice_list ], last_slice_end = Ctx#context.last_slice_end + SliceSize - }, - UpdRqBdy + } ); {done, UpdRqBdy} -> ets:insert(?MODULE, {{file, Key}, Ctx#context.slice_list}), @@ -206,9 +206,9 @@ process_request( crypto:hash(md5, term_to_binary(Ctx#context.slice_list)), #{mode => urlsafe} ), - {ok, Ctx, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}}; + {ok, {204, [{'Etag', ETag}], <<>>, true, UpdRqBdy}, Ctx}; {error, content_too_large} -> - {ok, Ctx, {413, [], <<>>, false, RqBdy}} + {ok, {413, [], <<>>, false, RqBdy}, Ctx} end. generate_uuid() -> @@ -230,12 +230,12 @@ slice_stream_fun(List) -> %% @doc Record the output of the interaction -spec record_request( - context(), riak_api_web_handler:timings(), - riak_api_web_handler:completion() + riak_api_web_handler:completion(), + context() ) -> ok. -record_request(Ctx, Timings, Completion) -> +record_request(Timings, Completion, Ctx) -> {A, B, C} = Timings, io:format( user, diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 4d401f0..f9199cb 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -66,44 +66,44 @@ unicode:chardata(), list(unicode:chardata()) ) -> - no_match | + nomatch | {method_not_allowed, list(riak_api_web_acceptor:method())} | - {ok, context(), riak_api_web_handler:limits()}. + {ok, riak_api_web_handler:limits(), context()}. match_route('GET', <<"/random_data">>, _SP) -> - {ok, #context{}, {10, 1024, 128 * 1024}}; + {ok, {10, 1024, 128 * 1024}, #context{}}; match_route(_, <<"/random_data">>, _SP) -> {method_not_allowed, ['GET']}; match_route(_, _, _) -> - no_match. + nomatch. %% @doc check_permissions for using this module or route -spec check_permissions( - context(), riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - riak_api_web_handler:peer() + riak_api_web_handler:peer(), + context() ) -> {ok, context()}. -check_permissions(Ctx, _Hdrs, _Scheme, _Peer) -> +check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> {ok, Ctx}. %% @doc parse and validate query params, passed as a map -spec parse_query_params( - context(), - riak_api_web_handler:query_params() + riak_api_web_handler:query_params(), + context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). -parse_query_params(#context{required_size = undefined}, []) -> +parse_query_params([], #context{required_size = undefined}) -> {halt, 400, [], <<"no required_size parameter">>, []}; -parse_query_params(Ctx, []) -> +parse_query_params([], Ctx) -> {ok, Ctx}; -parse_query_params(Ctx, [{<<"required_size">>, RS}|Rest]) -> +parse_query_params([{<<"required_size">>, RS}|Rest], Ctx) -> try case binary_to_integer(RS) of RSI when is_integer(RSI), RSI >= 0 -> - parse_query_params(Ctx#context{required_size = RSI}, Rest); + parse_query_params(Rest, Ctx#context{required_size = RSI}); _BadRS -> {halt, 400, [], <<"invalid required_size ~0p">>, [RS]} end @@ -111,17 +111,17 @@ parse_query_params(Ctx, [{<<"required_size">>, RS}|Rest]) -> _ : _ -> {halt, 400, [], <<"invalid required_size ~0p">>, [RS]} end; -parse_query_params(Ctx,[_Other|Rest]) -> - parse_query_params(Ctx, Rest). +parse_query_params([_Other|Rest], Ctx) -> + parse_query_params(Rest, Ctx). %% @doc parse and validate the request headers -spec parse_request_headers( - context(), - riak_api_web_headers:headers() + riak_api_web_headers:headers(), + context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). -parse_request_headers(Ctx, ReqHeaders) -> +parse_request_headers(ReqHeaders, Ctx) -> case riak_api_web_headers:lookup(?ID_HEADER_LWR, ReqHeaders, true) of undefined -> ErrorMsg = <<"request requires x-riak-request_id header">>, @@ -144,39 +144,39 @@ parse_request_headers(Ctx, ReqHeaders) -> %% @doc Process the request and produce a response -spec process_request( - context(), - riak_api_web_body:req_body() + riak_api_web_body:req_body(), + context() ) -> { ok, - context(), { riak_api_web_acceptor:response_code(), riak_api_web_headers:header_list(), riak_api_web_handler:response_body(), boolean(), riak_api_web_body:req_body() - } + }, + context() }. -process_request(Ctx = #context{request_id = RqID, required_size = RS}, RqBdy) +process_request(RqBdy, Ctx = #context{request_id = RqID, required_size = RS}) when is_integer(RqID), is_integer(RS), RS > 0 -> Body = crypto:strong_rand_bytes(RS), RspHdr = {<<"X-Riak-request_id">>, integer_to_binary(RqID)}, { ok, - Ctx, - {200, [RspHdr], Body, true, RqBdy} + {200, [RspHdr], Body, true, RqBdy}, + Ctx }. %% @doc Record the output of the interaction -spec record_request( - context(), riak_api_web_handler:timings(), - riak_api_web_handler:completion() + riak_api_web_handler:completion(), + context() ) -> ok. -record_request(_Ctx, Timings, Completion) -> +record_request(Timings, Completion, _Ctx) -> {A, B, C} = Timings, io:format( user, From 81beed1d6b2332aded6356d583db6a922076ede2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 2 Apr 2026 21:20:31 +0100 Subject: [PATCH 23/53] Optimise clock management Also provides functions for converting Last Modified Date in KV GET. --- src/riak_api_web.erl | 174 +++++++++++++++++++++++++++++++++- src/riak_api_web_acceptor.erl | 43 ++------- src/riak_api_web_socket.erl | 9 +- 3 files changed, 183 insertions(+), 43 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index c35a588..f5ddaca 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -30,7 +30,11 @@ binding_config/2, add_routes/1, get_route/3, - spec_name/3 + spec_name/3, + rfc1123_date/1, + rfc1123_date/2, + rfc1123_date_now/0, + cache_today/0 ] ). @@ -40,6 +44,10 @@ -type route() :: {1..100, module()}. +%%%============================================================================ +%%% Routing +%%%============================================================================ + -spec add_routes(list(route())) -> ok. add_routes(Routes) -> CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), @@ -81,6 +89,10 @@ get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> {ok, CallbackMod, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} end. +%%%============================================================================ +%%% Configure and Initiate Listeners +%%%============================================================================ + get_listeners() -> get_listeners(http) ++ get_listeners(https). @@ -179,3 +191,163 @@ common_config() -> )}, {backlog, 128} ]. + +%%%============================================================================ +%%% RFC1123 Clock Management +%%%============================================================================ + +-spec cache_today() -> ok. +cache_today() -> + {Date, Time} = calendar:now_to_universal_time(os:timestamp()), + case persistent_term:get({?MODULE, cache_today}, undefined) of + {Date, _DateBin} -> + ok; + _ -> + <> = rfc1123_date(Date, Time), + persistent_term:put({?MODULE, cache_today}, {Date, DateBin}) + end. + +-spec rfc1123_date_now() -> binary(). +rfc1123_date_now() -> + {Date, Time} = calendar:now_to_universal_time(os:timestamp()), + case persistent_term:get({?MODULE, cache_today}, undefined) of + {Date, DateBin} -> + rfc1123_date(DateBin, Time); + _ -> + spawn(fun cache_today/0), + rfc1123_date(Date, Time) + end. + +-spec rfc1123_date(erlang:timestamp()) -> binary(). +rfc1123_date(TS) -> + {Date, Time} = calendar:now_to_universal_time(TS), + rfc1123_date(Date, Time). + +rfc1123_date({YYYY,MM,DD},{Hr,Mn,Sc}) -> + DateBin = + << + (day_bin(calendar:day_of_the_week({YYYY,MM,DD})))/binary, + (i2_bin(DD))/binary, + (mon_bin(MM))/binary, + (integer_to_binary(YYYY))/binary, + <<" ">>/binary + >>, + rfc1123_date(DateBin, {Hr, Mn, Sc}); +rfc1123_date(DateBin, {Hr, Mn, Sc}) when is_binary(DateBin) -> + << + DateBin/binary, + (i2_bin(Hr))/binary, + $:, + (i2_bin(Mn))/binary, + $:, + (i2_bin(Sc))/binary, + <<" GMT">>/binary + >>. + +i2_bin(I) when I < 10 -> + << $0, (integer_to_binary(I))/binary >>; +i2_bin(I) -> + integer_to_binary(I). + +day_bin(1) -> + <<"Mon, ">>; +day_bin(2) -> + <<"Tue, ">>; +day_bin(3) -> + <<"Wed, ">>; +day_bin(4) -> + <<"Thu, ">>; +day_bin(5) -> + <<"Fri, ">>; +day_bin(6) -> + <<"Sat, ">>; +day_bin(7) -> + <<"Sun, ">>. + +mon_bin(1) -> + <<" Jan ">>; +mon_bin(2) -> + <<" Feb ">>; +mon_bin(3) -> + <<" Mar ">>; +mon_bin(4) -> + <<" Apr ">>; +mon_bin(5) -> + <<" May ">>; +mon_bin(6) -> + <<" Jun ">>; +mon_bin(7) -> + <<" Jul ">>; +mon_bin(8) -> + <<" Aug ">>; +mon_bin(9) -> + <<" Sep ">>; +mon_bin(10) -> + <<" Oct ">>; +mon_bin(11) -> + <<" Nov ">>; +mon_bin(12) -> + <<" Dec ">>. + + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +wm_rfc1123_date(TS) -> + {{YYYY, MM, DD}, {Hour, Min, Sec}} = calendar:now_to_universal_time(TS), + DayNumber = calendar:day_of_the_week({YYYY, MM, DD}), + iolist_to_binary( + lists:flatten( + io_lib:format( + "~s, ~2.2.0w ~3.s ~4.4.0w ~2.2.0w:~2.2.0w:~2.2.0w GMT", + [ + httpd_util:day(DayNumber), + DD, + httpd_util:month(MM), + YYYY, + Hour, + Min, + Sec + ] + ) + ) + ). + +date_speed_test() -> + {_, S, MicroS} = os:timestamp(), + Dates = lists:map(fun(I) -> {775 + I, S, MicroS} end, lists:seq(1, 1000)), + {TC1, DL1} = + timer:tc( + fun() -> lists:map(fun(TS) -> rfc1123_date(TS) end, Dates) end + ), + {TC2, DL2} = + timer:tc( + fun() -> lists:map(fun(TS) -> wm_rfc1123_date(TS) end, Dates) end + ), + io:format(user, "Timing for ours ~w vs wm ~w~n", [TC1, TC2]), + ?assert(DL1 == DL2), + + PreCalcDates = lists:map(fun(<>) -> D end, DL1), + NewInputs = lists:zip(PreCalcDates, Dates), + {TC3, DL3} = + timer:tc( + fun() -> + lists:map( + fun({CachedDate, TS}) -> + rfc1123_date( + CachedDate, + element(2, calendar:now_to_universal_time(TS)) + ) + end, + NewInputs + ) + end + ), + io:format(user, "With pre-cached dates ~w~n", [TC3]), + ?assert(DL1 == DL3). + +-endif. \ No newline at end of file diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 5d25f42..63d03c2 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4, start_clock/0, stop_clock/0]). +-export([extend_buffer/4]). -include_lib("kernel/include/logger.hrl"). @@ -673,35 +673,12 @@ get_response_line({1, 1}, Code) -> ] ). --spec start_clock() -> ok. -start_clock() -> - ?MODULE = - ets:new( - ?MODULE, - [named_table, public, {read_concurrency, true}] - ), - ok. - --spec stop_clock() -> true. -stop_clock() -> - ets:delete(?MODULE). - -spec default_response_headers( boolean() ) -> riak_api_web_headers:headers(). default_response_headers(KeepAlive) -> - DateHeader = - case {os:system_time(second), ets:lookup(?MODULE, rfc1123)} of - {Now, [{rfc1123, {CachedTime, CachedHdr}}]} when - Now == CachedTime - -> - CachedHdr; - {Now, _} -> - Hdr = {'Date', list_to_binary(httpd_util:rfc1123_date())}, - ets:insert(?MODULE, {rfc1123, {Now, Hdr}}), - Hdr - end, + DateHeader = {'Date', riak_api_web:rfc1123_date_now()}, ServerHeader = {'Server', <<"RiakAPI/4.0 SilverMachine">>}, ConnectionHeader = case KeepAlive of @@ -732,7 +709,7 @@ reason_phrase(N) -> httpd_util:reason_phrase(N). -include_lib("eunit/include/eunit.hrl"). clock_test() -> - ok = start_clock(), + ok = riak_api_web:cache_today(), {TC1, _Hdrs1} = timer:tc(fun() -> default_response_headers(true) end), {TC2, _Hdrs2} = @@ -744,7 +721,6 @@ clock_test() -> timer:sleep(1000), {TC5, _Hdrs5} = timer:tc(fun() -> default_response_headers(true) end), - ?assertMatch(1, ets:info(?MODULE, size)), MeanUnCached = (TC1 + TC5) div 2, MeanCached = (TC2 + TC3 + TC4) div 3, io:format( @@ -752,11 +728,10 @@ clock_test() -> "Cached ~w micros vs uncached ~w~n", [MeanCached, MeanUnCached] ), - ?assert(MeanCached < MeanUnCached), - ets:delete(?MODULE). + ?assert(MeanCached < MeanUnCached). simple_response_test() -> - ok = start_clock(), + ok = riak_api_web:cache_today(), set_version({1, 1}), FullResponse = generate_binary_response( @@ -777,11 +752,10 @@ simple_response_test() -> <<"\r\n">>/binary, <<"OutputOK">>/binary >>, - ?assertMatch(ExpectedResponse, FullResponse), - ets:delete(?MODULE). + ?assertMatch(ExpectedResponse, FullResponse). simple_stream_test() -> - ok = start_clock(), + ok = riak_api_web:cache_today(), SendFun = fun(Bin) when is_binary(Bin) -> case get({?MODULE, ?TEST, send_buffer}) of @@ -828,8 +802,7 @@ simple_stream_test() -> "\r\nA\r\nin chunks!\r\n0\r\n\r\n" >>/binary >>, - ?assertMatch(ExpectedResponse, Response), - ets:delete(?MODULE). + ?assertMatch(ExpectedResponse, Response). stream_fun() -> fun() -> diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 050f8e1..060b56e 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -64,8 +64,7 @@ init/1, handle_call/3, handle_cast/2, - handle_info/2, - terminate/2 + handle_info/2 ] ). @@ -229,12 +228,12 @@ init(Options) -> end, SocketOpts = default_socket_options(IP), {ok, Listener} = listen(Protocol, Port, SocketOpts, BufferOpts, SSLOpts), - riak_api_web_acceptor:start_clock(), {AcceptorPool, StartSize, MaxSize} = get_acceptor_pool(Listener, Options), ?LOG_INFO( "Acceptor pool for web started on IP ~0p port ~w of size ~w", [IP, Port, StartSize] ), + riak_api_web:cache_today(), { ok, #socket_state{ @@ -301,10 +300,6 @@ handle_info({'EXIT', Pid, Reason}, State) -> ?LOG_ERROR("Acceptor ~p unexpectedly crashed: ~0p", [Pid, Reason]), handle_info({'EXIT', Pid, normal}, State). -terminate(_Reason, _State) -> - riak_api_web_acceptor:stop_clock(), - ok. - %%%============================================================================ %%% Internal Functions %%%============================================================================ From c553fe21d48aede5d6ee5a552e9174fa299c8252 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 10:37:34 +0100 Subject: [PATCH 24/53] Tidy-up initial configuration --- src/riak_api_web.erl | 68 ++++++++++++------------------------- src/riak_api_web_socket.erl | 5 ++- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index f5ddaca..acb5b71 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -38,8 +38,6 @@ ] ). --include_lib("kernel/include/logger.hrl"). - -define(ROUTE_KEY, {?MODULE, web_routes}). -type route() :: {1..100, module()}. @@ -99,22 +97,10 @@ get_listeners() -> get_listeners(Scheme) -> Listeners = case - app_helper:try_envs( - [ - {riak_api, Scheme}, - {riak_core, Scheme} - ], - [] - ) + app_helper:try_envs([{riak_api, Scheme}], []) of {riak_api, Scheme, List} when is_list(List) -> List; - {riak_core, Scheme, List} when is_list(List) -> - ?LOG_WARNING( - "Setting riak_core/~s is deprecated, please use riak_api/~s", - [Scheme, Scheme] - ), - List; _ -> [] end, @@ -135,39 +121,27 @@ binding_config(Scheme, Binding) -> }. spec_from_binding(http, Name, {Ip, Port}) -> - Options = - lists:flatten( - [ - {name, Name}, - {ip, Ip}, - {port, Port}, - {nodelay, true} - ], - common_config() - ), - add_recbuf(Options); + lists:flatten( + [ + {name, Name}, + {ip, Ip}, + {port, Port}, + {nodelay, true} + ], + common_config() + ); spec_from_binding(https, Name, {Ip, Port}) -> - Options = - lists:flatten( - [ - {name, Name}, - {ip, Ip}, - {port, Port}, - {ssl, true}, - {ssl_opts, riak_api_ssl:options()}, - {nodelay, true} - ], - common_config() - ), - add_recbuf(Options). - -add_recbuf(Options) -> - case application:get_env(webmachine, recbuf) of - {ok, RecBuf} -> - [{recbuf, RecBuf} | Options]; - _ -> - Options - end. + lists:flatten( + [ + {name, Name}, + {ip, Ip}, + {port, Port}, + {ssl, true}, + {ssl_opts, riak_api_ssl:options()}, + {nodelay, true} + ], + common_config() + ). spec_name(Scheme, Ip, Port) -> FormattedIP = diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 060b56e..24a566e 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -86,6 +86,9 @@ -define(POOL_SIZE_DEFAULT, 16). -define(POOL_SIZE_MAX_DEFAULT, 2048). +-define(DEFAULT_RECV_BUFFER, 131072). + % Setting the receive buffer will also change the buffer + % https://github.com/erlang/otp/issues/9355 -record(socket_state, { port :: inet:port_number(), @@ -208,7 +211,7 @@ init(Options) -> BufferOpts = case get_tcp_buffer_options() of [] -> - []; + [{recbuf, ?DEFAULT_RECV_BUFFER}]; NonDefaultOpts -> ?LOG_INFO( "Non-default TCP buffer options configured for web ~0p", From 697631e2be8501e648c338ad95089410c58a332d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 11:43:38 +0100 Subject: [PATCH 25/53] Fix return tuple from security function --- src/riak_api_web.erl | 15 ++++++--------- src/riak_api_web_acceptor.erl | 2 +- src/riak_api_web_security.erl | 23 +++++++++++++++-------- src/riak_api_web_socket.erl | 4 ++-- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index acb5b71..1b5ffa7 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -96,9 +96,7 @@ get_listeners() -> get_listeners(Scheme) -> Listeners = - case - app_helper:try_envs([{riak_api, Scheme}], []) - of + case app_helper:try_envs([{riak_api, Scheme}], []) of {riak_api, Scheme, List} when is_list(List) -> List; _ -> @@ -197,10 +195,10 @@ rfc1123_date(TS) -> {Date, Time} = calendar:now_to_universal_time(TS), rfc1123_date(Date, Time). -rfc1123_date({YYYY,MM,DD},{Hr,Mn,Sc}) -> +rfc1123_date({YYYY, MM, DD}, {Hr, Mn, Sc}) -> DateBin = << - (day_bin(calendar:day_of_the_week({YYYY,MM,DD})))/binary, + (day_bin(calendar:day_of_the_week({YYYY, MM, DD})))/binary, (i2_bin(DD))/binary, (mon_bin(MM))/binary, (integer_to_binary(YYYY))/binary, @@ -219,7 +217,7 @@ rfc1123_date(DateBin, {Hr, Mn, Sc}) when is_binary(DateBin) -> >>. i2_bin(I) when I < 10 -> - << $0, (integer_to_binary(I))/binary >>; + <<$0, (integer_to_binary(I))/binary>>; i2_bin(I) -> integer_to_binary(I). @@ -263,7 +261,6 @@ mon_bin(11) -> mon_bin(12) -> <<" Dec ">>. - %%%============================================================================ %%% Eunit tests %%%============================================================================ @@ -304,7 +301,7 @@ date_speed_test() -> ), io:format(user, "Timing for ours ~w vs wm ~w~n", [TC1, TC2]), ?assert(DL1 == DL2), - + PreCalcDates = lists:map(fun(<>) -> D end, DL1), NewInputs = lists:zip(PreCalcDates, Dates), {TC3, DL3} = @@ -324,4 +321,4 @@ date_speed_test() -> io:format(user, "With pre-cached dates ~w~n", [TC3]), ?assert(DL1 == DL3). --endif. \ No newline at end of file +-endif. diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 63d03c2..2e6c8ab 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -259,7 +259,7 @@ split_path(URIPath) -> Path = maps:get(path, URIMap, <<"">>), SplitPath = case string:split(Path, <<"/">>, all) of - [<<>>|Rest] -> + [<<>> | Rest] -> Rest; PathList when is_list(PathList) -> PathList diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 946820e..e9b739a 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -27,6 +27,7 @@ -export([is_authorised/4]). -define(AUTH_PREFIX, "Basic "). +-define(ERR_HEADER, {'Content-Type', <<"text/plain">>}). -spec is_authorised( boolean(), @@ -57,21 +58,27 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> {ok, SecContext} -> {ok, SecContext}; {error, Error} -> - {halt, 401, <<"~0p">>, [Error]} + {halt, 401, [?ERR_HEADER], <<"~0p">>, [Error]} end catch _:ExError -> ?LOG_WARNING("Error decoding credentials ~0p", [ExError]), - {halt, 400, [], <<"Error decoding credentials">>, []} + { + halt, + 400, + [?ERR_HEADER], + <<"Error decoding credentials">>, + [] + } end; Unexpected -> ?LOG_WARNING("Error decoding credentials ~0p", [Unexpected]), - {halt, 400, [], <<"Error decoding credentials">>, []} + {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []} end; is_authorised(true, http, _ReqHeaders, _Peer, _AuthFun) -> - {halt, 426, [], <<"Upgrade required to https">>}; + {halt, 426, [?ERR_HEADER], <<"Upgrade required to https">>, []}; is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> - {true, undefined}. + {ok, undefined}. %%%============================================================================ %%% Eunit tests @@ -109,7 +116,7 @@ simple_security_test() -> ) ), ?assertMatch( - {halt, 400, [], <<"Error decoding credentials">>, []}, + {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []}, is_authorised( true, https, @@ -120,7 +127,7 @@ simple_security_test() -> ), BadCombo = base64:encode(iolist_to_binary([User2, <<":">>, Pass1])), ?assertMatch( - {halt, 401, <<"~0p">>, [invalid_credentials]}, + {halt, 401, [?ERR_HEADER], <<"~0p">>, [invalid_credentials]}, is_authorised( true, https, @@ -140,7 +147,7 @@ simple_security_test() -> ] ), ?assertMatch( - {halt, 400, [], <<"Error decoding credentials">>, []}, + {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []}, is_authorised( true, https, diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 24a566e..27047ee 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -87,8 +87,8 @@ -define(POOL_SIZE_DEFAULT, 16). -define(POOL_SIZE_MAX_DEFAULT, 2048). -define(DEFAULT_RECV_BUFFER, 131072). - % Setting the receive buffer will also change the buffer - % https://github.com/erlang/otp/issues/9355 +% Setting the receive buffer will also change the buffer +% https://github.com/erlang/otp/issues/9355 -record(socket_state, { port :: inet:port_number(), From ee81ccc5b776e852c4c7adc12a0ba68ff581a2b3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 11:44:16 +0100 Subject: [PATCH 26/53] Update riak_api_web_security.erl --- src/riak_api_web_security.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index e9b739a..532d9e9 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -27,7 +27,7 @@ -export([is_authorised/4]). -define(AUTH_PREFIX, "Basic "). --define(ERR_HEADER, {'Content-Type', <<"text/plain">>}). +-define(TXT_HEADER, {'Content-Type', <<"text/plain">>}). -spec is_authorised( boolean(), @@ -58,7 +58,7 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> {ok, SecContext} -> {ok, SecContext}; {error, Error} -> - {halt, 401, [?ERR_HEADER], <<"~0p">>, [Error]} + {halt, 401, [?TXT_HEADER], <<"~0p">>, [Error]} end catch _:ExError -> @@ -66,17 +66,17 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> { halt, 400, - [?ERR_HEADER], + [?TXT_HEADER], <<"Error decoding credentials">>, [] } end; Unexpected -> ?LOG_WARNING("Error decoding credentials ~0p", [Unexpected]), - {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []} + {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []} end; is_authorised(true, http, _ReqHeaders, _Peer, _AuthFun) -> - {halt, 426, [?ERR_HEADER], <<"Upgrade required to https">>, []}; + {halt, 426, [?TXT_HEADER], <<"Upgrade required to https">>, []}; is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> {ok, undefined}. @@ -116,7 +116,7 @@ simple_security_test() -> ) ), ?assertMatch( - {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []}, + {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []}, is_authorised( true, https, @@ -127,7 +127,7 @@ simple_security_test() -> ), BadCombo = base64:encode(iolist_to_binary([User2, <<":">>, Pass1])), ?assertMatch( - {halt, 401, [?ERR_HEADER], <<"~0p">>, [invalid_credentials]}, + {halt, 401, [?TXT_HEADER], <<"~0p">>, [invalid_credentials]}, is_authorised( true, https, @@ -147,7 +147,7 @@ simple_security_test() -> ] ), ?assertMatch( - {halt, 400, [?ERR_HEADER], <<"Error decoding credentials">>, []}, + {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []}, is_authorised( true, https, From adf5b51eb22dc18fe983d48a3bdfd6eb98bf4347 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 12:14:34 +0100 Subject: [PATCH 27/53] Be explicit peer is IP --- src/riak_api_web_acceptor.erl | 6 +++--- src/riak_api_web_handler.erl | 4 ++-- test/riak_api_web_ets_store.erl | 2 +- test/riak_api_web_get_random.erl | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 2e6c8ab..fba6ca4 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -60,7 +60,7 @@ response_code(), riak_api_web_headers:header_list(), binary(), - list() + list(term()) }. -type halt_result() :: @@ -140,7 +140,7 @@ handle_request(Socket, InitBuffer) -> reset_version(), RequestResult = maybe - {ok, Peer} = riak_api_web_socket:get_peer(Socket), + {ok, PeerIP} = riak_api_web_socket:get_peer(Socket), {ok, {Method, RawPath, Version, HdrBuffer}} ?= get_request_line(Socket, InitBuffer), set_version(Version), @@ -162,7 +162,7 @@ handle_request(Socket, InitBuffer) -> CallbackMod:check_permissions( ReqHeaders, element(1, Socket), - Peer, + PeerIP, InitModCtx ), {ok, ModCtx2} ?= diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index df5a710..505355e 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -52,7 +52,7 @@ -export_type( [ limits/0, - peer/0, + peer_ip/0, query_params/0, stream_fun/0, response_body/0, @@ -74,7 +74,7 @@ {method_not_allowed, list(riak_api_web_acceptor:method())} | {ok, limits(), context()}. --type peer() :: inet:ip_address(). +-type peer_ip() :: inet:ip_address(). %% The IP address of the client device connected to the socket %% @doc check_permissions for using this module or route diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index d3b693a..c191bc3 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -97,7 +97,7 @@ match_route(_, _, _) -> -spec check_permissions( riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - riak_api_web_handler:peer(), + riak_api_web_handler:peer_ip(), context() ) -> {ok, context()}. diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index f9199cb..3535a61 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -81,7 +81,7 @@ match_route(_, _, _) -> check_permissions( riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - riak_api_web_handler:peer(), + riak_api_web_handler:peer_ip(), context() ) -> {ok, context()}. From fa6a1944a3005b73a97585fb903c69eb21a05e75 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 12:19:32 +0100 Subject: [PATCH 28/53] Missed peer() --- src/riak_api_web_handler.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 505355e..36dff22 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -87,7 +87,7 @@ check_permissions( riak_api_web_headers:headers(), riak_api_web_socket:scheme(), - peer(), + peer_ip(), context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). From 94ec8f7d83dad5b54fac5c000187056263be8786 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 12:29:24 +0100 Subject: [PATCH 29/53] Correct security type for peer --- src/riak_api_web_security.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 532d9e9..dc7cb96 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -33,7 +33,7 @@ boolean(), http | https, riak_api_web_headers:headers(), - {ip, inet:ip_address()} + inet:ip_address() ) -> {ok, riak_core_security:context() | undefined} | riak_api_web_acceptor:halt_response(). @@ -43,7 +43,7 @@ is_authorised(Enabled, Scheme, ReqHeaders, Peer) -> Scheme, ReqHeaders, Peer, - fun(User, Pass, {ip, Pip}) -> + fun(User, Pass, Pip) -> riak_core_security:authenticate(User, Pass, [{ip, Pip}]) end ). From 9d666501d8f416c48397aa1fe893bdba70f03a0d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 3 Apr 2026 12:42:25 +0100 Subject: [PATCH 30/53] Align params type with uri_string:dissect_query --- src/riak_api_web_handler.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 36dff22..daa3e0d 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -93,7 +93,7 @@ {ok, context()}|riak_api_web_acceptor:halt_response(). --type query_params() :: [{binary(), binary()}]. +-type query_params() :: [{unicode:chardata(), unicode:chardata()|true}]. %% @doc parse and validate query params, passed as a map %% Any parameter will have both key and value as a binary, except if the From 746f98fbe3f7f74ad0d45fd039c3c1e6326b975f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sun, 12 Apr 2026 11:58:21 +0100 Subject: [PATCH 31/53] Unit test request line error handling --- src/riak_api_web_acceptor.erl | 79 +++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index fba6ca4..a39adc0 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -49,7 +49,7 @@ | 500..508. -type method() :: - 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE'. + 'OPTIONS' | 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE' | 'TRACE'. -type http_version() :: {1, 0} | {1, 1}. @@ -390,32 +390,23 @@ get_request_line(Socket, Buffer) -> extend_buffer(Socket, Buffer, 0, undefined) ); {ok, {http_request, Method, {abs_path, Path}, Version}, Rest} when - is_binary(Path) + is_binary(Path), is_atom(Method) -> case Version of SV when SV == {1, 0}; SV == {1, 1} -> - case Method of - SM when - SM == 'GET'; - SM == 'HEAD'; - SM == 'POST'; - SM == 'PUT'; - SM == 'DELETE' - -> - {ok, {SM, Path, SV, Rest}}; - _USM -> - {halt, 405, [], <<>>, []} - end; + {ok, {Method, Path, SV, Rest}}; _USV -> USVError = <<"Only HTTP 1.0 and 1.1 supported">>, {halt, 505, [], USVError, []} end; + {ok, {http_request, Method, _, _}, _Rest} when is_atom(Method) -> + bad_request(<<"Absolute path required not full or relative">>, []); {ok, {http_error, Error}, _} -> bad_request(<<"HTTP error on inbound request ~0p">>, [Error]); - {ok, Unexpected, _} -> + {ok, _Unexpected, _} -> bad_request( - <<"Unexpected error on inbound request ~0p">>, - [Unexpected] + <<"Unexpected request line ~0p">>, + [Buffer] ) end. @@ -707,6 +698,60 @@ reason_phrase(N) -> httpd_util:reason_phrase(N). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +request_line_decode_test() -> + ?assertMatch( + {halt, 400, [], <<"Absolute path required not full or relative">>, []}, + get_request_line( + test_socket, + <<"GET no-leading-slash/relative HTTP/1.1\r\n">> + ) + ), + ?assertMatch( + {halt, 400, [], <<"Absolute path required not full or relative">>, []}, + get_request_line( + test_socket, + <<"GET http://localhost:8000/full-path HTTP/1.1\r\n">> + ) + ), + ?assertMatch( + {halt, 400, [], <<"Absolute path required not full or relative">>, []}, + get_request_line( + test_socket, + <<"GET @ref HTTP/1.1\r\n">> + ) + ), + ?assertMatch( + { + halt, + 400, + [], + <<"HTTP error on inbound request ~0p">>, + [<<"GET @ref HTP/1.1\r\n">>] + }, + get_request_line( + test_socket, + <<"GET @ref HTP/1.1\r\n">> + ) + ), + ?assertMatch( + {halt, 505, [], <<"Only HTTP 1.0 and 1.1 supported">>, []}, + get_request_line(test_socket, <<"GET /stats HTTP/2.0\r\n">>) + ), + % If the method is not supported at all, then give general error - as it is + % not possible to know what methods are allowed on the URL - this can only + % be determined when matching routes + ?assertMatch( + { + halt, + 400, + [], + <<"Unexpected request line ~0p">>, + [<<"PATCH /stats HTTP/1.0\r\n">>] + }, + get_request_line(test_socket, <<"PATCH /stats HTTP/1.0\r\n">>) + ). clock_test() -> ok = riak_api_web:cache_today(), From 95b53141863155ec0a9d1f3aeac4de564060c25b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sun, 12 Apr 2026 22:10:22 +0100 Subject: [PATCH 32/53] Trim both ends of path --- src/riak_api_web.erl | 2 +- src/riak_api_web_acceptor.erl | 7 +------ src/riak_api_web_socket.erl | 9 ++++++++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 1b5ffa7..3439256 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -111,7 +111,7 @@ binding_config(Scheme, Binding) -> { Name, - {riak_api_web_socket, start, [Config]}, + {riak_api_web_socket, start_link, [Config]}, permanent, 5000, worker, diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index a39adc0..fb60b53 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -258,12 +258,7 @@ split_path(URIPath) -> URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), SplitPath = - case string:split(Path, <<"/">>, all) of - [<<>> | Rest] -> - Rest; - PathList when is_list(PathList) -> - PathList - end, + string:split(string:trim(Path, both, "/"), <<"/">>, all), case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> {ok, {Path, SplitPath, QueryParams}}; diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 27047ee..aee1e0d 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -219,7 +219,14 @@ init(Options) -> ), NonDefaultOpts end, - {ip, IP} = lists:keyfind(ip, 1, Options), + {ip, IP} = + case lists:keyfind(ip, 1, Options) of + {ip, IPString} when is_list(IPString) -> + {ok, IPAddr} = inet:parse_address(IPString), + {ip, IPAddr}; + {ip, IPAddr} -> + {ip, IPAddr} + end, {port, Port} = lists:keyfind(port, 1, Options), {Protocol, SSLOpts} = case lists:keyfind(ssl, 1, Options) of From b826e4868053d846a3cdd8919d394a5d822dea9b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 10:49:39 +0100 Subject: [PATCH 33/53] Use binary split not string split --- src/riak_api_web_acceptor.erl | 3 +-- src/riak_api_web_headers.erl | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index fb60b53..5d58d11 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -257,8 +257,7 @@ split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), - SplitPath = - string:split(string:trim(Path, both, "/"), <<"/">>, all), + SplitPath = binary:split(Path, <<"/">>, [global, trim_all]), case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> {ok, {Path, SplitPath, QueryParams}}; diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 4bd8038..8e4e147 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -220,10 +220,7 @@ get_unique_value(K, H) -> %% secondary information is `;` separated list -spec parse_primary_header_value(binary()) -> unicode:chardata(). parse_primary_header_value(HeaderValue) -> - string:trim( - hd(string:split(HeaderValue, [$;])), - both - ). + binary:split(HeaderValue, <<";">>, [global, trim_all]). %% @doc %% Fetch the {original key, values} for a binary (non-standard) header key. @@ -404,7 +401,7 @@ normalize_value(MultipleValues) when is_list(MultipleValues) -> normalize_value(FieldValue) when is_binary(FieldValue) -> lists:map( fun(V) -> string:trim(V, both) end, - string:split(FieldValue, ?V_SEPARATOR, all) + binary:split(FieldValue, ?V_SEPARATOR, [global]) ). %%%============================================================================ From c9139dddc3427e89a09fc697d803e0d1d9aee392 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 11:30:07 +0100 Subject: [PATCH 34/53] RFC requires only leading whitespace to be removed from header Also compile the binary pattern, and cache in a persistent term to speed up parsing --- src/riak_api_web_headers.erl | 50 +++++++++++++++++++++++++++++++----- src/riak_api_web_socket.erl | 1 + 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 8e4e147..6fc3ea8 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -35,6 +35,7 @@ -export([get_value/2, get_unique_value/2, lookup/3, prefix_fold/3]). -export([parse_primary_header_value/1]). -export([output_response_block/1, parse_request_block/3]). +-export([compile_separator/0]). -define(KV_SEPARATOR, <<": ">>). -define(V_SEPARATOR, <<", ">>). @@ -399,11 +400,18 @@ normalize_key(KBin) when is_binary(KBin) -> normalize_value(MultipleValues) when is_list(MultipleValues) -> lists:filter(fun is_binary/1, MultipleValues); normalize_value(FieldValue) when is_binary(FieldValue) -> + CP = persistent_term:get({?MODULE, ?V_SEPARATOR}, ?V_SEPARATOR), lists:map( - fun(V) -> string:trim(V, both) end, - binary:split(FieldValue, ?V_SEPARATOR, [global]) + fun(V) -> string:trim(V, leading) end, + binary:split(FieldValue, CP, [global]) ). +%% @doc Call this function when initialising API +-spec compile_separator() -> ok. +compile_separator() -> + CP = binary:compile_pattern(?V_SEPARATOR), + persistent_term:put({?MODULE, ?V_SEPARATOR}, CP). + %%%============================================================================ %%% Eunit tests %%%============================================================================ @@ -411,12 +419,40 @@ normalize_value(FieldValue) when is_binary(FieldValue) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +split_perf_test() -> + HV1 = <<"SOME-INDEX|HEADER|NOTSPLIT">>, + HV2 = <<"HDR1, HDR2, HDR3">>, + L = [HV1, HV1, HV1, HV1, HV2], + FullL = lists:flatten(lists:map(fun(_I) -> L end, lists:seq(1, 1000))), + {TS1, L1} = + timer:tc( + fun() -> + lists:map( + fun(HV) -> binary:split(HV, ?V_SEPARATOR, [global]) end, + FullL + ) + end + ), + CPVS = binary:compile_pattern(?V_SEPARATOR), + {TS2, L2} = + timer:tc( + fun() -> + lists:map( + fun(HV) -> binary:split(HV, CPVS, [global]) end, + FullL + ) + end + ), + ?assertMatch(L1, L2), + io:format(user, "No-compile ~w compile ~w microseconds", [TS1, TS2]). + + parse_block_test() -> RequestHeader1 = << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "x-riak-index-Field1_bin: NAME3|DOB1\r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" >>, RequestHeader2 = @@ -431,7 +467,7 @@ parse_splitblock_test() -> << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "x-riak-index-Field1_bin: NAME3|DOB1\r\n" "X-Riak-Index-field2_bin: POSTCODE1" >>, RequestHeader2 = @@ -473,7 +509,7 @@ riak_metadata_test() -> << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "x-riak-index-Field1_bin: NAME3|DOB1\r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" >>, RequestHeader2 = @@ -505,7 +541,7 @@ content_smuggling_test() -> << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" + "x-riak-index-Field1_bin: NAME3|DOB1\r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" "content-length: 16384\r\n" "\r\n" @@ -526,7 +562,7 @@ response_header_test() -> {<<"X-Riak-Index-field1_bin">>, [ <<"NAME1|DOB1">>, <<"NAME2|DOB1">> ]}, - {<<"X-Riak-Index-field1_bin">>, <<"NAME3|DOB1 ">>}, + {<<"X-Riak-Index-field1_bin">>, <<"NAME3|DOB1">>}, {<<"X-Riak-Index-field2_bin">>, <<"POSTCODE1|DOB1">>} ], RespHeaders1 = make_rsp_header(InitHeaders), diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index aee1e0d..7d278c9 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -244,6 +244,7 @@ init(Options) -> [IP, Port, StartSize] ), riak_api_web:cache_today(), + riak_api_web_headers:compile_separator(), { ok, #socket_state{ From 1793961daf33e8be3ee00ebcca30c607b8b9bfb9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 12:59:20 +0100 Subject: [PATCH 35/53] RFC requires trimming both leading and trailing whitespace https://greenbytes.de/tech/webdav/rfc7230.html#header.fields --- src/riak_api_web_headers.erl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 6fc3ea8..34637f2 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -40,6 +40,7 @@ -define(KV_SEPARATOR, <<": ">>). -define(V_SEPARATOR, <<", ">>). -define(L_SEPARATOR, <<"\r\n">>). +-define(OWS, [<<" ">>, <<"\t">>]). -record(headers, { type = request :: request | response, @@ -400,9 +401,20 @@ normalize_key(KBin) when is_binary(KBin) -> normalize_value(MultipleValues) when is_list(MultipleValues) -> lists:filter(fun is_binary/1, MultipleValues); normalize_value(FieldValue) when is_binary(FieldValue) -> - CP = persistent_term:get({?MODULE, ?V_SEPARATOR}, ?V_SEPARATOR), + {CP, WS} = + persistent_term:get( + {?MODULE, ?V_SEPARATOR}, + {?V_SEPARATOR, ?OWS} + ), lists:map( - fun(V) -> string:trim(V, leading) end, + fun(V) -> + case binary:split(V, WS, [global, trim_all]) of + [V0] when is_binary(V0) -> + V0; + _ -> + string:trim(V, both) + end + end, binary:split(FieldValue, CP, [global]) ). @@ -410,7 +422,8 @@ normalize_value(FieldValue) when is_binary(FieldValue) -> -spec compile_separator() -> ok. compile_separator() -> CP = binary:compile_pattern(?V_SEPARATOR), - persistent_term:put({?MODULE, ?V_SEPARATOR}, CP). + WS = binary:compile_pattern(?OWS), + persistent_term:put({?MODULE, ?V_SEPARATOR}, {CP, WS}). %%%============================================================================ %%% Eunit tests @@ -509,7 +522,7 @@ riak_metadata_test() -> << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" >>, RequestHeader2 = @@ -541,7 +554,7 @@ content_smuggling_test() -> << "content-length: 1024\r\n" "x-riak-Index-field1_bin: NAME1|DOB1, NAME2|DOB1\r\n" - "x-riak-index-Field1_bin: NAME3|DOB1\r\n" + "x-riak-index-Field1_bin: NAME3|DOB1 \t \r\n" "X-Riak-Index-field2_bin: POSTCODE1|DOB1\r\n" "content-length: 16384\r\n" "\r\n" From 261afdc22c8141d769b1930189d9b68ca68eca8d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 13:18:51 +0100 Subject: [PATCH 36/53] compile URL separator --- src/riak_api_web_acceptor.erl | 14 ++++++++++++-- src/riak_api_web_headers.erl | 12 ++++++------ src/riak_api_web_socket.erl | 3 ++- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 5d58d11..4c45ab4 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4]). +-export([extend_buffer/4, compile_separators/0]). -include_lib("kernel/include/logger.hrl"). @@ -257,7 +257,12 @@ split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), - SplitPath = binary:split(Path, <<"/">>, [global, trim_all]), + SplitPath = + binary:split( + Path, + persistent_term:get({?MODULE, separators}, <<"/">>), + [global, trim_all] + ), case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> {ok, {Path, SplitPath, QueryParams}}; @@ -274,6 +279,11 @@ split_path(URIPath) -> ) end. +-spec compile_separators() -> ok. +compile_separators() -> + SS = binary:compile_pattern(<<"/">>), + persistent_term:put({?MODULE, separators}, SS). + -spec extend_buffer( riak_api_web_socket:socket(), binary(), diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 34637f2..8331a6c 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -35,7 +35,7 @@ -export([get_value/2, get_unique_value/2, lookup/3, prefix_fold/3]). -export([parse_primary_header_value/1]). -export([output_response_block/1, parse_request_block/3]). --export([compile_separator/0]). +-export([compile_separators/0]). -define(KV_SEPARATOR, <<": ">>). -define(V_SEPARATOR, <<", ">>). @@ -403,7 +403,7 @@ normalize_value(MultipleValues) when is_list(MultipleValues) -> normalize_value(FieldValue) when is_binary(FieldValue) -> {CP, WS} = persistent_term:get( - {?MODULE, ?V_SEPARATOR}, + {?MODULE, separators}, {?V_SEPARATOR, ?OWS} ), lists:map( @@ -419,11 +419,11 @@ normalize_value(FieldValue) when is_binary(FieldValue) -> ). %% @doc Call this function when initialising API --spec compile_separator() -> ok. -compile_separator() -> - CP = binary:compile_pattern(?V_SEPARATOR), +-spec compile_separators() -> ok. +compile_separators() -> + CS = binary:compile_pattern(?V_SEPARATOR), WS = binary:compile_pattern(?OWS), - persistent_term:put({?MODULE, ?V_SEPARATOR}, {CP, WS}). + persistent_term:put({?MODULE, separators}, {CS, WS}). %%%============================================================================ %%% Eunit tests diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 7d278c9..08a557a 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -244,7 +244,8 @@ init(Options) -> [IP, Port, StartSize] ), riak_api_web:cache_today(), - riak_api_web_headers:compile_separator(), + riak_api_web_headers:compile_separators(), + riak_api_web_acceptor:compile_separators(), { ok, #socket_state{ From b45d244ef12027b55f51cb404423792112522343 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 13:27:51 +0100 Subject: [PATCH 37/53] Revert "compile URL separator" This reverts commit 261afdc22c8141d769b1930189d9b68ca68eca8d. --- src/riak_api_web_acceptor.erl | 14 ++------------ src/riak_api_web_headers.erl | 12 ++++++------ src/riak_api_web_socket.erl | 3 +-- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 4c45ab4..5d58d11 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4, compile_separators/0]). +-export([extend_buffer/4]). -include_lib("kernel/include/logger.hrl"). @@ -257,12 +257,7 @@ split_path(URIPath) -> case uri_string:normalize(URIPath, [return_map]) of URIMap when is_map(URIMap) -> Path = maps:get(path, URIMap, <<"">>), - SplitPath = - binary:split( - Path, - persistent_term:get({?MODULE, separators}, <<"/">>), - [global, trim_all] - ), + SplitPath = binary:split(Path, <<"/">>, [global, trim_all]), case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of QueryParams when is_list(QueryParams) -> {ok, {Path, SplitPath, QueryParams}}; @@ -279,11 +274,6 @@ split_path(URIPath) -> ) end. --spec compile_separators() -> ok. -compile_separators() -> - SS = binary:compile_pattern(<<"/">>), - persistent_term:put({?MODULE, separators}, SS). - -spec extend_buffer( riak_api_web_socket:socket(), binary(), diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 8331a6c..34637f2 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -35,7 +35,7 @@ -export([get_value/2, get_unique_value/2, lookup/3, prefix_fold/3]). -export([parse_primary_header_value/1]). -export([output_response_block/1, parse_request_block/3]). --export([compile_separators/0]). +-export([compile_separator/0]). -define(KV_SEPARATOR, <<": ">>). -define(V_SEPARATOR, <<", ">>). @@ -403,7 +403,7 @@ normalize_value(MultipleValues) when is_list(MultipleValues) -> normalize_value(FieldValue) when is_binary(FieldValue) -> {CP, WS} = persistent_term:get( - {?MODULE, separators}, + {?MODULE, ?V_SEPARATOR}, {?V_SEPARATOR, ?OWS} ), lists:map( @@ -419,11 +419,11 @@ normalize_value(FieldValue) when is_binary(FieldValue) -> ). %% @doc Call this function when initialising API --spec compile_separators() -> ok. -compile_separators() -> - CS = binary:compile_pattern(?V_SEPARATOR), +-spec compile_separator() -> ok. +compile_separator() -> + CP = binary:compile_pattern(?V_SEPARATOR), WS = binary:compile_pattern(?OWS), - persistent_term:put({?MODULE, separators}, {CS, WS}). + persistent_term:put({?MODULE, ?V_SEPARATOR}, {CP, WS}). %%%============================================================================ %%% Eunit tests diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 08a557a..7d278c9 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -244,8 +244,7 @@ init(Options) -> [IP, Port, StartSize] ), riak_api_web:cache_today(), - riak_api_web_headers:compile_separators(), - riak_api_web_acceptor:compile_separators(), + riak_api_web_headers:compile_separator(), { ok, #socket_state{ From 5d7bb8324b205a17a7ecb51946253a574e49f9d6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 Apr 2026 13:29:38 +0100 Subject: [PATCH 38/53] Only compile separators if not single character --- src/riak_api_web_headers.erl | 10 +++++----- src/riak_api_web_socket.erl | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index 34637f2..a41173f 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -35,7 +35,7 @@ -export([get_value/2, get_unique_value/2, lookup/3, prefix_fold/3]). -export([parse_primary_header_value/1]). -export([output_response_block/1, parse_request_block/3]). --export([compile_separator/0]). +-export([compile_separators/0]). -define(KV_SEPARATOR, <<": ">>). -define(V_SEPARATOR, <<", ">>). @@ -403,7 +403,7 @@ normalize_value(MultipleValues) when is_list(MultipleValues) -> normalize_value(FieldValue) when is_binary(FieldValue) -> {CP, WS} = persistent_term:get( - {?MODULE, ?V_SEPARATOR}, + {?MODULE, compile_patterns}, {?V_SEPARATOR, ?OWS} ), lists:map( @@ -419,11 +419,11 @@ normalize_value(FieldValue) when is_binary(FieldValue) -> ). %% @doc Call this function when initialising API --spec compile_separator() -> ok. -compile_separator() -> +-spec compile_separators() -> ok. +compile_separators() -> CP = binary:compile_pattern(?V_SEPARATOR), WS = binary:compile_pattern(?OWS), - persistent_term:put({?MODULE, ?V_SEPARATOR}, {CP, WS}). + persistent_term:put({?MODULE, compile_patterns}, {CP, WS}). %%%============================================================================ %%% Eunit tests diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 7d278c9..2da7cf2 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -244,7 +244,7 @@ init(Options) -> [IP, Port, StartSize] ), riak_api_web:cache_today(), - riak_api_web_headers:compile_separator(), + riak_api_web_headers:compile_separators(), { ok, #socket_state{ From 2af1af0e53425fbdc98fdc5a6cb3fc70d9eaa57d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 15 Apr 2026 14:58:56 +0100 Subject: [PATCH 39/53] Extend testing of errors --- rebar.config | 1 + src/riak_api_web.erl | 30 +- src/riak_api_web_acceptor.erl | 80 +++-- src/riak_api_web_body.erl | 26 +- src/riak_api_web_handler.erl | 16 +- src/riak_api_web_headers.erl | 9 +- test/riak_api_web_ets_store.erl | 5 +- test/riak_api_web_trigger.erl | 539 ++++++++++++++++++++++++++++++++ 8 files changed, 671 insertions(+), 35 deletions(-) create mode 100644 test/riak_api_web_trigger.erl diff --git a/rebar.config b/rebar.config index b78713a..a4a67e6 100644 --- a/rebar.config +++ b/rebar.config @@ -17,6 +17,7 @@ "src/riak_api_web_handler.erl" "test/riak_api_web_get_random.erl", "test/riak_api_web_ets_store.erl", + "test/riak_api_web_trigger.erl", "rebar.config" ]}, {exclude_files, []} diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index 3439256..b05660c 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -60,7 +60,7 @@ add_routes(Routes) -> { ok, module(), - {pos_integer(), pos_integer(), pos_integer()}, + {pos_integer(), pos_integer(), non_neg_integer()}, any() } | riak_api_web_acceptor:halt_response(). @@ -83,7 +83,9 @@ get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> ) ), {halt, 405, [{'Allow', AllowHdrVal}], <<>>, []}; - {ok, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} -> + {ok, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} when + MaxHdrCount > 0, MaxHdrSize > 0, MaxBodySize >= 0 + -> {ok, CallbackMod, {MaxHdrCount, MaxHdrSize, MaxBodySize}, Context} end. @@ -183,7 +185,7 @@ cache_today() -> rfc1123_date_now() -> {Date, Time} = calendar:now_to_universal_time(os:timestamp()), case persistent_term:get({?MODULE, cache_today}, undefined) of - {Date, DateBin} -> + {CachedDate, DateBin} when CachedDate == Date -> rfc1123_date(DateBin, Time); _ -> spawn(fun cache_today/0), @@ -321,4 +323,26 @@ date_speed_test() -> io:format(user, "With pre-cached dates ~w~n", [TC3]), ?assert(DL1 == DL3). +check_date_is_autocached_test() -> + persistent_term:erase({?MODULE, cache_today}), + rfc1123_date_now(), + true = + lists:foldl( + fun(I, Acc) -> + case Acc of + true -> + true; + false -> + timer:sleep(I), + not_cached =/= + persistent_term:get( + {?MODULE, cache_today}, not_cached + ) + end + end, + false, + lists:seq(1, 100) + ), + rfc1123_date_now(). + -endif. diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 5d58d11..c769d7d 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -170,7 +170,7 @@ handle_request(Socket, InitBuffer) -> {ok, ModCtx3} ?= CallbackMod:parse_request_headers(ReqHeaders, ModCtx2), {ok, {CLorChunk, UseGzip}} ?= expect_body(ReqHeaders), - {ok, InitReqBdy} ?= + {ok, InitReqBody} ?= riak_api_web_body:initiate_body( extend_buffer_fun(Socket), BdyBuffer, @@ -179,11 +179,48 @@ handle_request(Socket, InitBuffer) -> MaxBodySize ), ok ?= send_continue(Socket, ReqHeaders), - {ok, {Code, RspHeaders, RspBody, KeepAliveOK, ReqBdy1}, ModCtx4} ?= + {ok, NextReqBody, CallbackReqBody} ?= + case MaxBodySize of + N when N == 0 -> + case riak_api_web_body:confirm_empty(InitReqBody) of + {ok, RemBody} -> + {ok, RemBody, none}; + {error, content_too_large} -> + { + halt, + 413, + [{'Content-Type', <<"text/plain">>}], + <<>>, + [] + } + end; + _N -> + {ok, none, InitReqBody} + end, + {ok, {Code, RspHeaders, RspBody, KeepAliveOK, RetBody}, ModCtx4} ?= CallbackMod:process_request( - InitReqBdy, + CallbackReqBody, ModCtx3 ), + {ok, BufferNext} ?= + case {NextReqBody, RetBody} of + {NextReqBody, none} when NextReqBody =/= none -> + {ok, riak_api_web_body:get_buffer(NextReqBody)}; + {none, RetBody} when RetBody =/= none -> + {ok, riak_api_web_body:get_buffer(RetBody)}; + _ -> + WarnText = + "Incorrect handling of request body buffer in" + " callback module ~w", + ?LOG_WARNING(WarnText, [CallbackMod]), + { + halt, + 500, + [{'Content-Type', <<"text/plain">>}], + <<"Error handling request body">>, + [] + } + end, Keepalive = request_prefers_keepalive(Version, ReqHeaders) andalso KeepAliveOK, @@ -200,7 +237,7 @@ handle_request(Socket, InitBuffer) -> RspBody, {CallbackMod, ModCtx4}, Socket, - riak_api_web_body:get_buffer(ReqBdy1), + BufferNext, StartTime } else @@ -285,13 +322,11 @@ extend_buffer(Socket, Buffer, Needed, Timeout) when is_integer(Needed) -> case riak_api_web_socket:recv(Socket, Needed, get_timeout(Timeout)) of {ok, Data} when is_binary(Data) -> <>; - {error, Reason} -> - ?LOG_WARNING( - "Unexpected failure to read data from client " - "~w for socket ~0p", - [Reason, Socket] - ), + {error, closed} -> riak_api_web_socket:close(Socket), + exit(normal); + {error, Reason} -> + log_unexpected_recv(Socket, Reason), exit(normal) end; extend_buffer(Socket, Buffer, line, Timeout) -> @@ -299,15 +334,20 @@ extend_buffer(Socket, Buffer, line, Timeout) -> {ok, Data} when is_binary(Data) -> <>; {error, Reason} -> - ?LOG_WARNING( - "Unexpected failure to read data from client " - "~w for socket ~0p", - [Reason, Socket] - ), - riak_api_web_socket:close(Socket), + log_unexpected_recv(Socket, Reason), exit(normal) end. +-spec log_unexpected_recv( + riak_api_web_socket:socket(), + term() +) -> + ok | {error, term()}. +log_unexpected_recv(Socket, Reason) -> + LogText = "Unexpected failure to read data from client ~w for socket ~0p", + ?LOG_WARNING(LogText, [Reason, Socket]), + riak_api_web_socket:close(Socket). + -spec extend_buffer_fun( riak_api_web_socket:socket() ) -> @@ -632,11 +672,15 @@ generate_binary_response(RspCode, RspHeaders, RspBody) -> get_response_line({1, 0}, 200) -> <<"HTTP/1.0 200 OK\r\n">>; get_response_line({1, 0}, 201) -> - <<"HTTP/1.0 201 Accepted\r\n">>; + <<"HTTP/1.0 201 Created\r\n">>; +get_response_line({1, 0}, 204) -> + <<"HTTP/1.0 204 No Content\r\n">>; get_response_line({1, 1}, 200) -> <<"HTTP/1.1 200 OK\r\n">>; get_response_line({1, 1}, 201) -> - <<"HTTP/1.1 201 Accepted\r\n">>; + <<"HTTP/1.1 201 Created\r\n">>; +get_response_line({1, 1}, 204) -> + <<"HTTP/1.1 204 No Content\r\n">>; get_response_line({1, 0}, Code) -> iolist_to_binary( [ diff --git a/src/riak_api_web_body.erl b/src/riak_api_web_body.erl index 0f8cc6f..6406f11 100644 --- a/src/riak_api_web_body.erl +++ b/src/riak_api_web_body.erl @@ -30,7 +30,15 @@ -module(riak_api_web_body). --export([get_buffer/1, initiate_body/5, get_body/3, is_gzip/1]). +-export( + [ + get_buffer/1, + initiate_body/5, + get_body/3, + confirm_empty/1, + is_gzip/1 + ] +). -record(req_body, { buffer :: binary(), @@ -77,7 +85,7 @@ get_buffer(ReqBody) -> binary(), chunked | non_neg_integer(), boolean(), - pos_integer() + non_neg_integer() ) -> {ok, req_body()}. initiate_body(BufferFun, BdyBuffer, CLorChunk, UseGzip, MaxBodySize) -> @@ -92,6 +100,20 @@ initiate_body(BufferFun, BdyBuffer, CLorChunk, UseGzip, MaxBodySize) -> } }. +-spec confirm_empty( + riak_api_web_body:req_body() +) -> + {ok, riak_api_web_body:req_body()} | {error, content_too_large}. +confirm_empty(ReqBody) -> + case riak_api_web_body:get_body(ReqBody, all, 10000) of + {done, UpdBody} -> + {ok, UpdBody}; + {<<>>, UpdBody} -> + confirm_empty(UpdBody); + {error, content_too_large} -> + {error, content_too_large} + end. + -spec get_body( req_body(), all | pos_integer(), pos_integer() | undefined ) -> diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index daa3e0d..779e9a0 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -44,9 +44,10 @@ -type max_header_size() :: pos_integer(). %% The maximum size of a single header value. If concatenating multiple %% values causes issues with this limit - the may be split across headers. --type max_body_size() :: pos_integer(). +-type max_body_size() :: non_neg_integer(). %% The maximum size of the body (on the wire) i.e. prior to being unzipped - %% if compression is allowed + %% if compression is allowed. Should be set to 0 if no request body is + %% expected -type limits() :: {max_header_count(), max_header_size(), max_body_size()}. -export_type( @@ -113,7 +114,7 @@ ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). --type stream_fun() :: fun(() -> {binary(), done|stream_fun()}). +-type stream_fun() :: fun(() -> {binary(), stream_fun()}|done). -type response_body() :: binary() | {stream, stream_fun()}. @@ -150,10 +151,13 @@ %% acceptable. %% %% The final req_body() must also be returned, so that any remaining data on -%% the buffer is available to the acceptor. +%% the buffer is available to the acceptor. If the size_limit on the request +%% is set to 0, then a req_body() of none will be sent and should be returned. +%% If a non-zero request body is expected the whole body should be read from +%% the buffer before returning the updated request body object. -callback process_request( - riak_api_web_body:req_body(), + riak_api_web_body:req_body()|none, context() ) -> { @@ -163,7 +167,7 @@ riak_api_web_headers:header_list(), response_body(), boolean(), - riak_api_web_body:req_body() + riak_api_web_body:req_body()|none }, context() } | riak_api_web_acceptor:halt_response(). diff --git a/src/riak_api_web_headers.erl b/src/riak_api_web_headers.erl index a41173f..1ecc6b4 100644 --- a/src/riak_api_web_headers.erl +++ b/src/riak_api_web_headers.erl @@ -274,8 +274,8 @@ output_response_block(#headers{type = T, header_map = HM}) when T == response -> ) ). --define(COUNT_EXCEEDED, <<"Header count exceed ~w">>). --define(SIZE_EXCEEDED, <<"Header exceeded maximum size of ~w">>). +-define(COUNT_EXCEEDED, <<"Headers exceeded maximum count of ~w">>). +-define(SIZE_EXCEEDED, <<"Header ~s exceeded maximum size of ~w">>). %% @doc %% Parse a binary block representing the start of a block of request headers, @@ -293,8 +293,8 @@ parse_request_block(_B, _BFun, {MaxCount, _MS}, {_H, C}) when C > MaxCount -> {halt, 431, [], ?COUNT_EXCEEDED, [MaxCount]}; parse_request_block(Buffer, BufferFun, {MaxCount, MaxSize}, {HeaderAcc, C}) -> case erlang:decode_packet(httph_bin, Buffer, []) of - {ok, {http_header, _, _, _, V}, _} when byte_size(V) > MaxSize -> - {halt, 431, [], ?SIZE_EXCEEDED, [MaxSize]}; + {ok, {http_header, _, _, OrigKey, V}, _} when byte_size(V) > MaxSize -> + {halt, 431, [], ?SIZE_EXCEEDED, [OrigKey, MaxSize]}; {ok, {http_header, _, Key, _OrigKey, Value}, Rest} when is_atom(Key) -> parse_request_block( Rest, @@ -459,7 +459,6 @@ split_perf_test() -> ?assertMatch(L1, L2), io:format(user, "No-compile ~w compile ~w microseconds", [TS1, TS2]). - parse_block_test() -> RequestHeader1 = << diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index c191bc3..16a0839 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -544,7 +544,10 @@ put_big_header({A, B, C, D}, Port) -> [{body_format, binary}], test_client ), - ?assertMatch(<<"Header exceeded maximum size of 1024">>, RspBdy) + ?assertMatch( + <<"Header x-riak-vclock exceeded maximum size of 1024">>, + RspBdy + ) end. find_available_port([]) -> diff --git a/test/riak_api_web_trigger.erl b/test/riak_api_web_trigger.erl new file mode 100644 index 0000000..d0d3584 --- /dev/null +++ b/test/riak_api_web_trigger.erl @@ -0,0 +1,539 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2026 Martin Sumner +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Test handler that is used for triggering error conditions + +-module(riak_api_web_trigger). + +-behaviour(riak_api_web_handler). + +-export( + [ + match_route/3, + check_permissions/4, + parse_query_params/2, + parse_request_headers/2, + process_request/2, + record_request/3 + ] +). + +-ifdef(TEST). +-export( + [ + setup/0, + generator/1, + cleanup/1 + ] +). +-endif. + +-record(context, { + mishandle_nonzero_body = false :: boolean(), + response_code = 200 :: 200 | 201 | 204 +}). + +-type context() :: #context{}. + +%% @doc match_route for the module +-spec match_route( + riak_api_web_acceptor:method(), + unicode:chardata(), + list(unicode:chardata()) +) -> + nomatch + | {method_not_allowed, list(riak_api_web_acceptor:method())} + | {ok, riak_api_web_handler:limits(), context()}. +match_route('PUT', _P, [<<"with_limits">>, HC, HS, BS]) when + is_binary(HC), is_binary(HS), is_binary(BS) +-> + { + ok, + { + binary_to_integer(HC), + binary_to_integer(HS), + binary_to_integer(BS) + }, + #context{} + }; +match_route(_, _, _) -> + nomatch. + +%% @doc check_permissions for using this module or route +-spec check_permissions( + riak_api_web_headers:headers(), + riak_api_web_socket:scheme(), + riak_api_web_handler:peer_ip(), + context() +) -> + {ok, context()}. +check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> + {ok, Ctx}. + +%% @doc parse and validate query params, passed as a map +-spec parse_query_params( + riak_api_web_handler:query_params(), + context() +) -> + {ok, context()} | riak_api_web_acceptor:halt_response(). +parse_query_params(QueryParams, Ctx) -> + {ok, Ctx1} = + case lists:keyfind(<<"mishandle_nonzero_body">>, 1, QueryParams) of + {<<"mishandle_nonzero_body">>, true} -> + {ok, Ctx#context{mishandle_nonzero_body = true}}; + _ -> + {ok, Ctx} + end, + case lists:keyfind(<<"response_code">>, 1, QueryParams) of + {<<"response_code">>, RC} when is_binary(RC) -> + {ok, Ctx1#context{response_code = binary_to_integer(RC)}}; + _ -> + {ok, Ctx1} + end. + +%% @doc parse and validate the request headers +-spec parse_request_headers( + riak_api_web_headers:headers(), + context() +) -> + {ok, context()} | riak_api_web_acceptor:halt_response(). +parse_request_headers(_ReqHeaders, Ctx) -> + {ok, Ctx}. + +%% @doc Process the request and produce a response +-spec process_request( + riak_api_web_body:req_body() | none, + context() +) -> + { + ok, + { + riak_api_web_acceptor:response_code(), + riak_api_web_headers:header_list(), + riak_api_web_handler:response_body(), + boolean(), + riak_api_web_body:req_body() | none + }, + context() + }. +process_request(RqBdy, Ctx) -> + case {Ctx#context.mishandle_nonzero_body, RqBdy} of + {true, RqBdy} when RqBdy =/= none -> + {ok, {200, [], <<>>, true, none}, Ctx}; + {false, none} -> + {ok, {Ctx#context.response_code, [], <<>>, true, RqBdy}, Ctx}; + {false, RqBdy} -> + case riak_api_web_body:get_body(RqBdy, all, 10000) of + {Buffer, UpdBdy} when Buffer =/= error -> + {ok, {Ctx#context.response_code, [], <<>>, true, UpdBdy}, + Ctx} + end + end. + +%% @doc Record the output of the interaction +-spec record_request( + riak_api_web_handler:timings(), + riak_api_web_handler:completion(), + context() +) -> + ok. +record_request(_Timings, _Completion, _Ctx) -> + ok. + +%%%============================================================================ +%%% Eunit tests +%%%============================================================================ + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +basic_handler_test_() -> + {setup, fun setup/0, fun cleanup/1, fun generator/1}. + +setup() -> + inets:start(), + TestPort = find_available_port(lists:seq(8000, 8999)), + IPAddr = {127, 0, 0, 1}, + SpecName = riak_api_web:spec_name(http, IPAddr, TestPort), + Options = + [ + {name, SpecName}, + {ip, IPAddr}, + {port, TestPort}, + {web_acceptor_pool_start_size, 4} + ], + {ok, _Pid} = riak_api_web_socket:start_link(Options), + riak_api_web:add_routes([{10, ?MODULE}]), + {ok, _HTTPC} = inets:start(httpc, [{profile, test_client}]), + ok = httpc:set_options([{verbose, false}], test_client), + {SpecName, IPAddr, TestPort}. + +generator({_SpecName, IPAddr, Port}) -> + [ + too_many_headers(IPAddr, Port), + header_too_large(IPAddr, Port), + non_zero_body(IPAddr, Port), + zero_body(IPAddr, Port), + mishandle_nonzero_body(IPAddr, Port), + handle_bad_uri(IPAddr, Port), + handle_bad_content_length(IPAddr, Port), + handle_connection_header_confusion(IPAddr, Port), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.0">>, + 201, + <<"HTTP/1.0 201 Created\r\n">> + ), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.0">>, + 204, + <<"HTTP/1.0 204 No Content\r\n">> + ), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.0">>, + 202, + <<"HTTP/1.0 202 Accepted\r\n">> + ), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.1">>, + 201, + <<"HTTP/1.1 201 Created\r\n">> + ), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.1">>, + 204, + <<"HTTP/1.1 204 No Content\r\n">> + ), + trigger_alternative_response_code( + IPAddr, + Port, + <<"1.1">>, + 202, + <<"HTTP/1.1 202 Accepted\r\n">> + ) + ]. + +request_bin(HC, HS, BS, HeaderSize, BodySize) -> + request_bin(HC, HS, BS, <<"">>, HeaderSize, BodySize). + +request_bin(HC, HS, BS, QP, HeaderSize, BodySize) -> + <> = + base64:encode(crypto:strong_rand_bytes(HeaderSize)), + Body = + case BodySize of + "A" -> + crypto:strong_rand_bytes(10); + _ -> + crypto:strong_rand_bytes(BodySize) + end, + Rq = + io_lib:format( + << + "PUT /with_limits/~w/~w/~w?~s HTTP/1.1\r\n" + "Connection: close\r\n" + "Content-Length: ~w\r\n" + "X-Riak-BigHeader: ~s\r\n" + "Content-Type: application/octet-stream\r\n" + "\r\n" + "~w" + >>, + [HC, HS, BS, QP, BodySize, Header, Body] + ), + iolist_to_binary(Rq). + +request_bin_cc(HC, HS, BS, HeaderSize, BodySize, Version) -> + <> = + base64:encode(crypto:strong_rand_bytes(HeaderSize)), + Body = crypto:strong_rand_bytes(BodySize), + Rq = + io_lib:format( + << + "PUT /with_limits/~w/~w/~w?QP HTTP/~s\r\n" + "Connection: close\r\n" + "Connection: keep-alive\r\n" + "Content-Length: ~w\r\n" + "X-Riak-BigHeader: ~s\r\n" + "Content-Type: application/octet-stream\r\n" + "\r\n" + "~w" + >>, + [HC, HS, BS, Version, BodySize, Header, Body] + ), + iolist_to_binary(Rq). + +request_bin_rc(HC, HS, BS, RC, HeaderSize, BodySize, Version) -> + <> = + base64:encode(crypto:strong_rand_bytes(HeaderSize)), + Body = crypto:strong_rand_bytes(BodySize), + Rq = + io_lib:format( + << + "PUT /with_limits/~w/~w/~w?response_code=~w HTTP/~s\r\n" + "Connection: keep-alive\r\n" + "Content-Length: ~w\r\n" + "X-Riak-BigHeader: ~s\r\n" + "Content-Type: application/octet-stream\r\n" + "\r\n" + "~w" + >>, + [HC, HS, BS, RC, Version, BodySize, Header, Body] + ), + iolist_to_binary(Rq). + +too_many_headers(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(1, 1024, 1024, 32, 64), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 431 Request Header Fields Too Large\r\n">>, + L1 + ), + ?assertNotMatch( + nomatch, + string:find(R1, <<"Headers exceeded maximum count of 1">>) + ), + ok = gen_tcp:close(Socket) + end. + +header_too_large(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(16, 64, 1024, 256, 64), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 431 Request Header Fields Too Large\r\n">>, + L1 + ), + ?assertNotMatch( + nomatch, + string:find( + R1, + <<"Header X-Riak-BigHeader exceeded maximum size of 64">> + ) + ), + ok = gen_tcp:close(Socket) + end. + +non_zero_body(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(16, 2048, 0, 32, 64), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 413 Content Too Large\r\n">>, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +zero_body(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(16, 2048, 0, 32, 0), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 200 OK\r\n">>, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +mishandle_nonzero_body(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + QP = <<"mishandle_nonzero_body">>, + Request = request_bin(16, 2048, 1024, QP, 32, 64), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 500 Internal Server Error\r\n">>, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +handle_bad_uri(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(<<"badly_encoded_con%0tent">>, 2048, 0, 32, 64), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 400 Bad Request\r\n">>, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +handle_bad_content_length(IPAddr, Port) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin(8, 512, 96, 32, "A"), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + <<"HTTP/1.1 400 Bad Request\r\n">>, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +handle_connection_header_confusion(IPAddr, Port) -> + fun() -> + {ok, Socket10} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request10 = request_bin_cc(8, 512, 96, 32, 32, <<"1.0">>), + ok = gen_tcp:send(Socket10, Request10), + {ok, Data10} = gen_tcp:recv(Socket10, 0), + {ok, L1, R1} = erlang:decode_packet(line, Data10, []), + ?assertMatch( + <<"HTTP/1.0 200 OK\r\n">>, + L1 + ), + ?assertMatch( + nomatch, + string:find(R1, <<"Connection: keep-alive">>) + ), + ?assertNotMatch( + nomatch, + string:find(R1, <<"Connection: close">>) + ), + ok = gen_tcp:close(Socket10), + {ok, Socket11} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request11 = request_bin_cc(8, 512, 96, 32, 32, <<"1.1">>), + ok = gen_tcp:send(Socket11, Request11), + {ok, Data11} = gen_tcp:recv(Socket11, 0), + {ok, L2, R2} = erlang:decode_packet(line, Data11, []), + ?assertMatch( + <<"HTTP/1.1 200 OK\r\n">>, + L2 + ), + ?assertNotMatch( + nomatch, + string:find(R2, <<"Connection: keep-alive">>) + ), + ?assertMatch( + nomatch, + string:find(R2, <<"Connection: close">>) + ), + ok = gen_tcp:close(Socket10) + end. + +trigger_alternative_response_code(IPAddr, Port, Version, RC, RM) -> + fun() -> + {ok, Socket} = + gen_tcp:connect( + IPAddr, + Port, + [binary, {packet, raw}, {active, false}] + ), + Request = request_bin_rc(16, 512, 256, RC, 32, 64, Version), + ok = gen_tcp:send(Socket, Request), + {ok, Data} = gen_tcp:recv(Socket, 0), + {ok, L1, _R1} = erlang:decode_packet(line, Data, []), + ?assertMatch( + RM, + L1 + ), + ok = gen_tcp:close(Socket) + end. + +cleanup({SpecName, _IPAddr, _Port}) -> + ok = inets:stop(), + ?assertMatch(4, riak_api_web_socket:get_active_pool_size(SpecName)), + riak_api_web_socket:stop(SpecName), + ok. + +find_available_port([]) -> + no_port_found; +find_available_port([Port | Rest]) -> + case gen_tcp:listen(Port, []) of + {ok, Sock} -> + ok = gen_tcp:close(Sock), + Port; + _ -> + find_available_port(Rest) + end. + +-endif. From d5645098d250b7af53eb4054d9a6f48066c00428 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 17 Apr 2026 23:35:21 +0100 Subject: [PATCH 40/53] Allow for neither content-length or transfer-encoding Just assume that it is empty --- src/riak_api_web_acceptor.erl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index c769d7d..0fb5f0d 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -397,6 +397,9 @@ expect_body(Headers) -> {ok, {chunked, false}}; {undefined, [<<"chunked">>, <<"gzip">>]} -> {ok, {chunked, true}}; + {undefined, undefined} -> + % Assume no content - and set content-length to 0 + {ok, {0, false}}; {undefined, UnexpectedEncoding} -> UEWarn = <<"Received encoding ~0p without content length">>, bad_request(UEWarn, [UnexpectedEncoding]); @@ -905,6 +908,9 @@ expect_test() -> ] ), ?assertMatch({ok, {1024, false}}, expect_body(FixedLength)), + Empty = riak_api_web_headers:make([]), + % e.g. just curl GET from command line - no encoding or content-length + ?assertMatch({ok, {0, false}}, expect_body(Empty)), FixedLengthGZ = riak_api_web_headers:make( [ From 07d49b50b2da529fee2c9e27d78c63887b154a82 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 18 Apr 2026 01:13:39 +0100 Subject: [PATCH 41/53] Hack improvement to uri parse The uri_string:normalize is going to decode that path and the query params (resolve any percent encoding, then check the outcome is unicode). It will also handle any dot segments. If there are no dots, or percent encoding - this is unnecessary - the parsed outcome will be the same. This will normally be the case - so bypass normalization if possible. Paths can be quite long (with long bucket names, keys etc) - so avoiding this step has value. --- src/riak_api_web_acceptor.erl | 35 +++++++++++++++++++++++++++++------ src/riak_api_web_socket.erl | 1 + 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 0fb5f0d..b59ef8c 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -28,7 +28,7 @@ -export([start_link/1, init/2]). --export([extend_buffer/4]). +-export([extend_buffer/4, compile_detectors/0]). -include_lib("kernel/include/logger.hrl"). @@ -278,6 +278,29 @@ reset_version() -> bad_request(Error, Subs) -> {halt, 400, [], Error, Subs}. +%% @doc %% @doc Call this function when initialising API +-spec compile_detectors() -> ok. +compile_detectors() -> + CP = binary:compile_pattern([<<"%">>, <<".">>]), + persistent_term:put({?MODULE, compile_patterns}, CP). + +-spec check_normalised(uri_string:uri_map()) -> {binary(), binary()}. +check_normalised(URIMap) -> + CP = persistent_term:get({?MODULE, compile_patterns}), + Path = normalise_string(maps:get(path, URIMap, <<>>), CP), + QueryParams = normalise_string(maps:get(query, URIMap, <<>>), CP), + {Path, QueryParams}. + +normalise_string(<<>>, _CP) -> + <<>>; +normalise_string(Bin, CP) -> + case binary:match(Bin, CP) of + nomatch -> + Bin; + _ -> + uri_string:normalize(Bin) + end. + -spec split_path( iodata() ) -> @@ -291,13 +314,13 @@ bad_request(Error, Subs) -> } | halt_response(). split_path(URIPath) -> - case uri_string:normalize(URIPath, [return_map]) of + case uri_string:parse(URIPath) of URIMap when is_map(URIMap) -> - Path = maps:get(path, URIMap, <<"">>), - SplitPath = binary:split(Path, <<"/">>, [global, trim_all]), - case uri_string:dissect_query(maps:get(query, URIMap, <<"">>)) of + {PathN, QueryParamsN} = check_normalised(URIMap), + SplitPath = binary:split(PathN, <<"/">>, [global, trim_all]), + case uri_string:dissect_query(QueryParamsN) of QueryParams when is_list(QueryParams) -> - {ok, {Path, SplitPath, QueryParams}}; + {ok, {PathN, SplitPath, QueryParams}}; {error, QTerm, QReason} -> bad_request( <<"Query parameters not parsed ~w - ~0p">>, diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 2da7cf2..c878626 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -245,6 +245,7 @@ init(Options) -> ), riak_api_web:cache_today(), riak_api_web_headers:compile_separators(), + riak_api_web_acceptor:compile_detectors(), { ok, #socket_state{ From 6bc535c0fe474e76d03f943748d528e1538b5c7b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 18 Apr 2026 11:15:52 +0100 Subject: [PATCH 42/53] Tidy-up changes to parse vs normalise --- src/riak_api_web_acceptor.erl | 40 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index b59ef8c..12eb941 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -284,25 +284,20 @@ compile_detectors() -> CP = binary:compile_pattern([<<"%">>, <<".">>]), persistent_term:put({?MODULE, compile_patterns}, CP). --spec check_normalised(uri_string:uri_map()) -> {binary(), binary()}. -check_normalised(URIMap) -> +-spec normalise_path(binary()) -> uri_string:uri_map() | uri_string:error(). +normalise_path(URI) -> CP = persistent_term:get({?MODULE, compile_patterns}), - Path = normalise_string(maps:get(path, URIMap, <<>>), CP), - QueryParams = normalise_string(maps:get(query, URIMap, <<>>), CP), - {Path, QueryParams}. - -normalise_string(<<>>, _CP) -> - <<>>; -normalise_string(Bin, CP) -> - case binary:match(Bin, CP) of + case binary:match(URI, CP) of nomatch -> - Bin; + % There is no percent-encoded content, or no path reversing, and + % so it is safe to parse rather than normalise + uri_string:parse(URI); _ -> - uri_string:normalize(Bin) + uri_string:normalize(URI, [return_map]) end. -spec split_path( - iodata() + binary() ) -> { ok, @@ -314,9 +309,10 @@ normalise_string(Bin, CP) -> } | halt_response(). split_path(URIPath) -> - case uri_string:parse(URIPath) of + case normalise_path(URIPath) of URIMap when is_map(URIMap) -> - {PathN, QueryParamsN} = check_normalised(URIMap), + {PathN, QueryParamsN} = + {maps:get(path, URIMap, <<>>), maps:get(query, URIMap, <<>>)}, SplitPath = binary:split(PathN, <<"/">>, [global, trim_all]), case uri_string:dissect_query(QueryParamsN) of QueryParams when is_list(QueryParams) -> @@ -329,7 +325,7 @@ split_path(URIPath) -> end; {error, NTerm, NReason} -> bad_request( - <<"Path cannot be normalized ~w - ~0p">>, + <<"Path cannot be normalized ~w - ~0p">>, [NTerm, NReason] ) end. @@ -923,6 +919,18 @@ stream_fun() -> end end. +normalise_path_test() -> + compile_detectors(), + URI1 = <<"types/BT/buckets/B/keys/K?return_terms">>, + URI2 = <<"types/BT/buckets/../buckets/B/key%73/K?return_term%73">>, + {ok, Output1} = split_path(URI1), + {ok, Output2} = split_path(URI2), + ?assertMatch(Output1, Output2), + URI3 = <<"types/T/buckets/Swedes/keys/%C3%85berg?return_terms">>, + {ok, {_, SP, _}} = split_path(URI3), + [<<"types">>, <<"T">>, <<"buckets">>, <<"Swedes">>, <<"keys">>, Name] = SP, + ?assertMatch(<<"Åberg"/utf8>>, uri_string:unquote(Name)). + expect_test() -> FixedLength = riak_api_web_headers:make( From aa0441e691cac334edb1efbf8a944e49df2ca9dd Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 22 Apr 2026 18:16:26 +0100 Subject: [PATCH 43/53] Don't leave URI elements quoted --- src/riak_api_web_acceptor.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 12eb941..9ac7ab1 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -293,7 +293,12 @@ normalise_path(URI) -> % so it is safe to parse rather than normalise uri_string:parse(URI); _ -> - uri_string:normalize(URI, [return_map]) + case uri_string:normalize(URI, [return_map]) of + URIMap when is_map(URIMap) -> + uri_string:percent_decode(URIMap); + {error, Type, Detail} -> + {error, Type, Detail} + end end. -spec split_path( @@ -311,8 +316,8 @@ normalise_path(URI) -> split_path(URIPath) -> case normalise_path(URIPath) of URIMap when is_map(URIMap) -> - {PathN, QueryParamsN} = - {maps:get(path, URIMap, <<>>), maps:get(query, URIMap, <<>>)}, + PathN = maps:get(path, URIMap, <<>>), + QueryParamsN = maps:get(query, URIMap, <<>>), SplitPath = binary:split(PathN, <<"/">>, [global, trim_all]), case uri_string:dissect_query(QueryParamsN) of QueryParams when is_list(QueryParams) -> @@ -929,7 +934,7 @@ normalise_path_test() -> URI3 = <<"types/T/buckets/Swedes/keys/%C3%85berg?return_terms">>, {ok, {_, SP, _}} = split_path(URI3), [<<"types">>, <<"T">>, <<"buckets">>, <<"Swedes">>, <<"keys">>, Name] = SP, - ?assertMatch(<<"Åberg"/utf8>>, uri_string:unquote(Name)). + ?assertMatch(<<"Åberg"/utf8>>, Name). expect_test() -> FixedLength = From 24c3964a932f5cef32b94f9436886bfcc9c03ff7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 23 Apr 2026 14:21:01 +0100 Subject: [PATCH 44/53] Handle no auth-headers --- src/riak_api_web_security.erl | 38 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index dc7cb96..1c4820b 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -54,7 +54,7 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> try UserPass = base64:decode(Base64UP), [User, Pass] = string:lexemes(UserPass, ":"), - case AuthFun(User, Pass, [Peer]) of + case AuthFun(User, Pass, Peer) of {ok, SecContext} -> {ok, SecContext}; {error, Error} -> @@ -62,24 +62,23 @@ is_authorised(true, https, ReqHeaders, Peer, AuthFun) -> end catch _:ExError -> - ?LOG_WARNING("Error decoding credentials ~0p", [ExError]), - { - halt, - 400, - [?TXT_HEADER], - <<"Error decoding credentials">>, - [] - } + error_decoding_credentials(ExError) end; + undefined -> + {halt, 401, [?TXT_HEADER], <<"No credentials provided">>, []}; Unexpected -> - ?LOG_WARNING("Error decoding credentials ~0p", [Unexpected]), - {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []} + error_decoding_credentials(Unexpected) end; is_authorised(true, http, _ReqHeaders, _Peer, _AuthFun) -> {halt, 426, [?TXT_HEADER], <<"Upgrade required to https">>, []}; is_authorised(false, _, _ReqHeaders, _Peer, _AuthFun) -> {ok, undefined}. +error_decoding_credentials(ErrorTerm) -> + ?LOG_WARNING("Error decoding credentials ~0p", [ErrorTerm]), + {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []}. + + %%%============================================================================ %%% Eunit tests %%%============================================================================ @@ -155,6 +154,23 @@ simple_security_test() -> {ip, {127, 0, 0, 1}}, AuthFun ) + ), + NoAuthHeaders = + riak_api_web_headers:make( + [ + {'Content-Length', <<"1024">>}, + {<<"X-Riak-VClock">>, <<"ABC123==">>} + ] + ), + ?assertMatch( + {halt, 401, [?TXT_HEADER], <<"No credentials provided">>, []}, + is_authorised( + true, + https, + NoAuthHeaders, + {ip, {127, 0, 0, 1}}, + AuthFun + ) ). make_request_headers(Combo) -> From c9dbccf9e225c2947e6197c37dc579418b13da87 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 13:03:19 +0100 Subject: [PATCH 45/53] Set recv buffer Defaults to 1460 on OTP26 - and this means that not all data available in buffer --- test/riak_api_web_get_random.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 3535a61..81d92f2 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -350,7 +350,7 @@ request_single_value(IPAddr, Port, Size) -> gen_tcp:connect( IPAddr, Port, - [binary, {packet, raw}, {active, false}] + [binary, {packet, raw}, {active, false}, {recbuf, 64 * 1024}] ), Request = ?REQUEST_BIN(1, Size, <<"close">>), ok = gen_tcp:send(Socket, Request), From 7e0e92ccd4699498fa4f60c744245ef20485a59f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 13:13:29 +0100 Subject: [PATCH 46/53] fmt issue --- src/riak_api_web_security.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/riak_api_web_security.erl b/src/riak_api_web_security.erl index 1c4820b..97c312c 100644 --- a/src/riak_api_web_security.erl +++ b/src/riak_api_web_security.erl @@ -78,7 +78,6 @@ error_decoding_credentials(ErrorTerm) -> ?LOG_WARNING("Error decoding credentials ~0p", [ErrorTerm]), {halt, 400, [?TXT_HEADER], <<"Error decoding credentials">>, []}. - %%%============================================================================ %%% Eunit tests %%%============================================================================ From 211af58bd9f974a5644095dd033c873f7ec80be1 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 13:31:54 +0100 Subject: [PATCH 47/53] Remove cached vs uncached clock test Nature of clock implementation changed - and so all tests may use cached clock - and so no longer a valid comparison. --- src/riak_api_web_acceptor.erl | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 9ac7ab1..a3549aa 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -818,28 +818,6 @@ request_line_decode_test() -> get_request_line(test_socket, <<"PATCH /stats HTTP/1.0\r\n">>) ). -clock_test() -> - ok = riak_api_web:cache_today(), - {TC1, _Hdrs1} = - timer:tc(fun() -> default_response_headers(true) end), - {TC2, _Hdrs2} = - timer:tc(fun() -> default_response_headers(true) end), - {TC3, _Hdrs3} = - timer:tc(fun() -> default_response_headers(false) end), - {TC4, _Hdrs4} = - timer:tc(fun() -> default_response_headers(true) end), - timer:sleep(1000), - {TC5, _Hdrs5} = - timer:tc(fun() -> default_response_headers(true) end), - MeanUnCached = (TC1 + TC5) div 2, - MeanCached = (TC2 + TC3 + TC4) div 3, - io:format( - user, - "Cached ~w micros vs uncached ~w~n", - [MeanCached, MeanUnCached] - ), - ?assert(MeanCached < MeanUnCached). - simple_response_test() -> ok = riak_api_web:cache_today(), set_version({1, 1}), From 6ec0469bc2d69a8ec775a8fe455e5ae11bc5ac0a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 15:34:23 +0100 Subject: [PATCH 48/53] Get Peer and Cert at start of connection Reuse Peer/Cert in all check_permissions callback on the connection. --- src/riak_api_web_acceptor.erl | 25 +++++++++++++++++-------- src/riak_api_web_handler.erl | 1 + src/riak_api_web_socket.erl | 14 +++++++++++--- test/riak_api_web_ets_store.erl | 5 +++-- test/riak_api_web_get_random.erl | 5 +++-- test/riak_api_web_trigger.erl | 5 +++-- 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index a3549aa..8789d11 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -102,7 +102,8 @@ init(Server, Listener) -> case riak_api_web_socket:accept(Listener, ?ACCEPT_TIMEOUT) of {ok, Socket} -> ok = riak_api_web_socket:acceptor_accepted(Server), - loop(Socket, <<>>); + {ok, PeerIP, Cert} = riak_api_web_socket:get_peer(Socket), + loop(Socket, <<>>, PeerIP, Cert); {error, timeout} -> init(Server, Listener); {error, {tls_alert, Alert}} -> @@ -118,13 +119,19 @@ init(Server, Listener) -> %%% Primary Loop %%%============================================================================ --spec loop(riak_api_web_socket:socket(), binary()) -> ok. -loop(Socket, InitBuffer) -> +-spec loop( + riak_api_web_socket:socket(), + binary(), + inet:ip_address(), + public_key:cert()|undefined +) -> + ok. +loop(Socket, InitBuffer, PeerIP, Cert) -> %% In the keepalive loop, the send buffer is assumed to be empty %% An so pipelining of requests (in parallel) is explicitly not supported - case handle_request(Socket, InitBuffer) of + case handle_request(Socket, InitBuffer, PeerIP, Cert) of {KeepAlive, Buffer} when KeepAlive == true -> - loop(Socket, Buffer); + loop(Socket, Buffer, PeerIP, Cert); _Close -> riak_api_web_socket:close(Socket), ok @@ -132,15 +139,16 @@ loop(Socket, InitBuffer) -> -spec handle_request( riak_api_web_socket:socket(), - binary() + binary(), + inet:ip_address(), + public_key:cert()|undefined ) -> {boolean(), binary()} | close. -handle_request(Socket, InitBuffer) -> +handle_request(Socket, InitBuffer, PeerIP, Cert) -> StartTime = os:system_time(microsecond), reset_version(), RequestResult = maybe - {ok, PeerIP} = riak_api_web_socket:get_peer(Socket), {ok, {Method, RawPath, Version, HdrBuffer}} ?= get_request_line(Socket, InitBuffer), set_version(Version), @@ -163,6 +171,7 @@ handle_request(Socket, InitBuffer) -> ReqHeaders, element(1, Socket), PeerIP, + Cert, InitModCtx ), {ok, ModCtx2} ?= diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 779e9a0..63f4894 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -89,6 +89,7 @@ riak_api_web_headers:headers(), riak_api_web_socket:scheme(), peer_ip(), + public_key:cert() | undefined, context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index c878626..8eb8d0d 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -520,18 +520,26 @@ close({http, Socket}) -> close({https, Socket}) -> ssl:close(Socket). --spec get_peer(socket()) -> {ok, inet:ip_address()} | {error, any()}. +-spec get_peer( + socket() +) -> + {ok, inet:ip_address(), public_key:cert()|undefined} | {error, any()}. get_peer({http, Socket}) -> case inet:peername(Socket) of {ok, {Addr, _Port}} when is_tuple(Addr) -> - {ok, Addr}; + {ok, Addr, undefined}; {error, Error} -> {error, Error} end; get_peer({https, Socket}) -> case ssl:peername(Socket) of {ok, {Addr, _Port}} when is_tuple(Addr) -> - {ok, Addr}; + case ssl:peercert(Socket) of + {ok, Cert} -> + {ok, Addr, Cert}; + _ -> + {ok, Addr, undefined} + end; {error, Error} -> {error, Error} end. diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index 16a0839..ee846c3 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -30,7 +30,7 @@ -export( [ match_route/3, - check_permissions/4, + check_permissions/5, parse_query_params/2, parse_request_headers/2, process_request/2, @@ -98,10 +98,11 @@ match_route(_, _, _) -> riak_api_web_headers:headers(), riak_api_web_socket:scheme(), riak_api_web_handler:peer_ip(), + public_key:cert() | undefined, context() ) -> {ok, context()}. -check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> +check_permissions(_Hdrs, _Scheme, _Peer, _Cert, Ctx) -> {ok, Ctx}. %% @doc parse and validate query params, passed as a map diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index 81d92f2..c59dabe 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -30,7 +30,7 @@ -export( [ match_route/3, - check_permissions/4, + check_permissions/5, parse_query_params/2, parse_request_headers/2, process_request/2, @@ -82,10 +82,11 @@ match_route(_, _, _) -> riak_api_web_headers:headers(), riak_api_web_socket:scheme(), riak_api_web_handler:peer_ip(), + public_key:cert() | undefined, context() ) -> {ok, context()}. -check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> +check_permissions(_Hdrs, _Scheme, _Peer, _Cert, Ctx) -> {ok, Ctx}. %% @doc parse and validate query params, passed as a map diff --git a/test/riak_api_web_trigger.erl b/test/riak_api_web_trigger.erl index d0d3584..c8d25b9 100644 --- a/test/riak_api_web_trigger.erl +++ b/test/riak_api_web_trigger.erl @@ -26,7 +26,7 @@ -export( [ match_route/3, - check_permissions/4, + check_permissions/5, parse_query_params/2, parse_request_headers/2, process_request/2, @@ -80,10 +80,11 @@ match_route(_, _, _) -> riak_api_web_headers:headers(), riak_api_web_socket:scheme(), riak_api_web_handler:peer_ip(), + public_key:cert() | undefined, context() ) -> {ok, context()}. -check_permissions(_Hdrs, _Scheme, _Peer, Ctx) -> +check_permissions(_Hdrs, _Scheme, _Peer, _Cert, Ctx) -> {ok, Ctx}. %% @doc parse and validate query params, passed as a map From 917f6cd71dca207537cf0a4be4d89c922798e905 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 15:40:25 +0100 Subject: [PATCH 49/53] Format fix --- src/riak_api_web_acceptor.erl | 6 +++--- src/riak_api_web_socket.erl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 8789d11..3374200 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -123,8 +123,8 @@ init(Server, Listener) -> riak_api_web_socket:socket(), binary(), inet:ip_address(), - public_key:cert()|undefined -) -> + public_key:cert() | undefined +) -> ok. loop(Socket, InitBuffer, PeerIP, Cert) -> %% In the keepalive loop, the send buffer is assumed to be empty @@ -141,7 +141,7 @@ loop(Socket, InitBuffer, PeerIP, Cert) -> riak_api_web_socket:socket(), binary(), inet:ip_address(), - public_key:cert()|undefined + public_key:cert() | undefined ) -> {boolean(), binary()} | close. handle_request(Socket, InitBuffer, PeerIP, Cert) -> diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 8eb8d0d..4b0c349 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -523,7 +523,7 @@ close({https, Socket}) -> -spec get_peer( socket() ) -> - {ok, inet:ip_address(), public_key:cert()|undefined} | {error, any()}. + {ok, inet:ip_address(), public_key:cert() | undefined} | {error, any()}. get_peer({http, Socket}) -> case inet:peername(Socket) of {ok, {Addr, _Port}} when is_tuple(Addr) -> From ffa7553a42a6007612c955dcbfa4266d699136cf Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 15:44:19 +0100 Subject: [PATCH 50/53] Make type definition more consistent --- src/riak_api_web_handler.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 63f4894..563547c 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -77,6 +77,7 @@ -type peer_ip() :: inet:ip_address(). %% The IP address of the client device connected to the socket +-type peer_cert() :: public_key:cert() | undefined. %% @doc check_permissions for using this module or route %% The context() passed will be the context() returned from match_route/2 - so @@ -89,7 +90,7 @@ riak_api_web_headers:headers(), riak_api_web_socket:scheme(), peer_ip(), - public_key:cert() | undefined, + peer_cert(), context() ) -> {ok, context()}|riak_api_web_acceptor:halt_response(). From a417d2e63e6f1b28107f7e019972a530d22c45b6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 24 Apr 2026 15:45:05 +0100 Subject: [PATCH 51/53] And export type --- src/riak_api_web_handler.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/riak_api_web_handler.erl b/src/riak_api_web_handler.erl index 563547c..ba88ec1 100644 --- a/src/riak_api_web_handler.erl +++ b/src/riak_api_web_handler.erl @@ -54,6 +54,7 @@ [ limits/0, peer_ip/0, + peer_cert/0, query_params/0, stream_fun/0, response_body/0, From 99b9bab9178786cfd89f37cd60314413b355a4e1 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 29 Apr 2026 16:23:01 +0100 Subject: [PATCH 52/53] Increase default backlog 128 aligns with mochiweb - otherwise some tests with riak_test that use large bursts of connections may fail --- src/riak_api_web_socket.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index 4b0c349..fe993e1 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -107,8 +107,11 @@ %% when the previous process has not completed the close | {packet, raw} | {active, boolean()} -%% After a connection is accepted the socket is manually read to be -%% decoded + %% After a connection is accepted the socket is manually read to be + %% decoded + | {backlog, pos_integer()} +%% If this is too low it may result in some requests being reset when +%% there is a burst of new connections . -type buffer_option() :: @@ -323,7 +326,8 @@ default_socket_options(IPAddr) -> binary, {reuseaddr, true}, {packet, raw}, - {active, false} + {active, false}, + {backlog, 128} ]. -spec get_acceptor_pool(socket(), list(option())) -> From 1ce27c596fc860233f9ec166fd812024827a0401 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 1 May 2026 11:19:42 +0100 Subject: [PATCH 53/53] Basic doc added, with doc-inspired tidying --- README.md | 39 +------ docs/silverMachine.md | 178 +++++++++++++++++++++++++++++++ src/riak_api_web.erl | 44 ++++---- src/riak_api_web_acceptor.erl | 34 +++--- src/riak_api_web_socket.erl | 37 ++++--- test/riak_api_web_ets_store.erl | 2 +- test/riak_api_web_get_random.erl | 5 +- test/riak_api_web_trigger.erl | 2 +- 8 files changed, 254 insertions(+), 87 deletions(-) create mode 100644 docs/silverMachine.md diff --git a/README.md b/README.md index 4f71c37..2beec6f 100644 --- a/README.md +++ b/README.md @@ -2,40 +2,11 @@ ![Riak API OpenRiak Status](https://github.com/OpenRiak/riak_api/actions/workflows/erlang.yml/badge.svg?branch=openriak-4.0) -This OTP application encapsulates services for presenting Riak's -public-facing interfaces. Currently this means a generic interface for -exposing Protocol Buffers-based services; HTTP services via Webmachine -will be moved here at a later time. +This OTP application encapsulates services for presenting Riak's public-facing interfaces. -## Contributing +There two APIs: -We encourage contributions to `riak_api` from the community. +- An API using protocol buffers, with a codec defined in [riak_pb](https://github.com/OpenRiak/riak_pb), with the handling of messages managed using `riak_kv_pb_*` modules within Riak KV. +- A HTTP REST-based API (code-named Silver Machine), with the handling of requests defined using `riak_kv_ag_*` modules that implement the callbacks defined in the `riak_api_web_handler` behaviour. -1. Fork the [`riak_api`](https://github.com/basho/riak_api) repository - on Github. -2. Clone your fork or add the remote if you already have a clone of - the repository. - - ``` - git clone git@github.com:yourusername/riak_api.git - # or - git remote add mine git@github.com:yourusername/riak_api.git - ``` - -3. Create a topic branch for your change. - - ``` - git checkout -b some-topic-branch - ``` - -4. Make your change and commit. Use a clear and descriptive commit - message, spanning multiple lines if detailed explanation is needed. -5. Push to your fork of the repository and then send a pull-request - through Github. - - ``` - git push mine some-topic-branch - ``` - -6. A Basho engineer or community maintainer will review your patch and - merge it into the main repository or send you feedback. +For further information on using [Sliver Machine see the provided document](/docs/silverMachine.md). diff --git a/docs/silverMachine.md b/docs/silverMachine.md new file mode 100644 index 0000000..3483e8e --- /dev/null +++ b/docs/silverMachine.md @@ -0,0 +1,178 @@ +# Silver Machine + +## Overview + +Silver Machine is a HTTP/REST request handler. It is designed to be simpler and more performant than Webmachine/Mochiweb, with the trade-off that it provides less complete compliance with standards within the framework: + +- "simpler" means reduced volume of code within the framework (less than half), better use of dialyzer specs to clarify safe usage, and a behaviour module with a smaller and fixed number callbacks. +- "performant" means less CPU overhead when handling Riak requests, especially those carrying a large volume of information via HTTP request headers. + +Silver Machine took direct inspiration from the [Elli HTTP server](https://github.com/elli-lib/elli), using it as a source of ideas for improving performance. + +Silver Machine is not intended to be used outside of Riak. It is a framework developed specifically for the Riak use-case, and may have breaking changes within the framework at any time if such a change is required to support efficiency in Riak. + +Using Silver Machine requires three actions: + +- configuration to start [listeners](#listeners); +- the [loading of routes](#adding-routes), a prioritised list of modules that will provide endpoints via the listener; +- the definition of those modules to handle requests, implemented following the `riak_api_web_handler` [behaviour](#the-riak_api_web_handler-behaviour). + +### Listeners + +A listener is started using `riak_api_web_socket:start_link/1`, where the function takes as its argument a list of options: + +```erlang +-type option() :: + {acceptor_pool_start_size, pos_integer()} + | {acceptor_pool_max_size, pos_integer()} + | {ssl, boolean()} + | {ssl_opts, [ssl:tls_server_option()]} + | {ip, inet:ip_address()} + | {port, inet:port_number()} + | {name, server_name()}. +``` + +Within Riak the `riak_api_sup` sueprvisor is used to discover the bindings (IP and Port pairs) from the configuration, and start a listener for each binding. + +In addition to the passed-in options, three further options cna be set using environment variables: + +- `riak_api/web_kernel_buffer` - which will set the TCP `buffer`; +- `riak_api/web_receive_buffer` - which will set the TCP `recbuf`; +- `riak_api/web_send_buffer` - which will set the TCP `sndbuf`. + +If no environment variables are set, then the `recbuf` will be changed from its default setting to `131072`, and this will automatically [change the `buffer` setting](https://github.com/erlang/otp/issues/9355). + +Each listener is a socket (SSL or TCP), with a pool of acceptors. The acceptors will listen on the socket, and when new connections are made the listen results in the connection being managed by an available acceptor. The acceptor will live for the duration of the connection, but only for the duration of the connection. When an acceptor is assigned a connection (at the start), a new acceptor is started and added to the pool to replace the busy acceptor. There should always be a pool of acceptors ready; however due to the potential timing delays in the assignment of connections to acceptors the `backlog` on the socket (i.e. the backlog of unhandled connections) is configured to 128 (normal OTP default is 5) to avoid unnecessary connection resets. + +The acceptor pool maximum and starting size is defined at startup via the passed in options. If no such options are passed for that listener the defaults are taken from environment variables `riak_api/web_acceptor_pool_start_size` and `riak_api/web_acceptor_pool_max_size`. + +### Adding routes + +There are multiple routing tables - one for `default` routes, and one for each Port. Routes are lists of {1..100, module()} tuples. When a request is processed each module in the routing table will be matched against the request (using the `match_route/3` callback function) until a match is found. If port-specific routes are provided these will be used, default routes will only be used if no port-specific routes have been added. + +Routes can be added using `riak_api_web:add_routes/1`, `riak_api_web:add_routes/2`. + +Not when implementing the `match_route/3` callback of a module, care needs to be taken when different modules support the same path but with different methods. The routes will be checked until the first `ok` match or `method_not_allowed` match, and then no further routes will be checked. Other, lower priority, routes will only be checked when `nomatch` is returned. + +In the current `riak_kv` implementation only `default` routes are set, so all HTTP/HTTPS listeners have the same functionality. + +### The `riak_api_web_handler` behaviour + +The acceptor has a standard workflow of functions for handling a request (`riak_api_web_acceptor:handle_request/5`), and included in that workflow are the calls to the six callbacks required in the `riak_api_web_handler` behaviour: + +- [`match_route/3`](#match_route); + - Passed path information, to be potentially matched against the routing needs for the module. +- [`check_permissions/5`](#check_permissions); + - Passed credential information (request headers and peer details), to be potentially screen requests based on authentication and authorisation needs. +- [`parse_query_params/2`](#parse_query_params) + - Passed any parsed query parameters included in the request for validation. +- [`parse_request_headers/2`](#parse_request_headers) + - Passed all request headers included in the request for validation. +- [`process_request/2`](#process_request) + - Passed a `riak_api_web_body:req_body/0` object (or `none`) so that the value may be fetched, and the request processed and the response returned (either as a binary or a streaming function that will incrementally generate the binary). +- [`record_request/3`](#record_request) + - Passed timing information about the request to be recorded as required. + +At each callback a `context` object is required to be returned. The format of this object is opaque to the acceptor, but the object will be forwarded as an attribute as-is to the next callback in the list. So as the request is parsed and validated through its callback functions, the module should update the `context` with any information that might be relevant to its own callback functions later in the handling of the request. + +For `check_permissions/5`, `parse_query_params/2`, `parse_request_headers/2` and `process_request` the workflow can be terminated by returning a `riak_api_web_acceptor:halt_response()` rather than a positive response. This will prompt the workflow to be immediately terminated, and a response returned with the information contained within the `halt_response()` (e.g. response code, headers and message body). + +When a `halt_response()` is returned the connection will be closed, even where a `keepalive` request has been made. + +#### match_route + +This callback function should attempt to match the module to the path, and either return: + +- an `ok` process with the size limits for the module (count of headers, maximum byte-size of an individual header, and the maximum size of the body of the request), and an initial context object for the request. +- a `nomatch` response indicating the module does not support that path (and so the next module in the route priority list should be tried). +- a `method_not_allowed` response to indicate that the path matched but the method is not in the supported list of methods for this module. + +The `match_route` callback will receive 'Method' (an atom representing the HTTP request method), 'Path' (the full path as a binary string) and `Split Path` (the full path split into a list of individual elements separated by "/"). + +e.g. `GET /types/T/buckets/B/keys/K?returnbody=true HTTP/1.1` will lead to call to: + +```erlang +match_route('GET', <<"/types/T/buckets/B/keys/K">>, [<<"types">>, <<"T">>, <<"buckets">>, <<"B">>, <<"keys">>, <<"K">>]) +``` + +The split path (list) is trimmed of any leading or trailing empty elements e.g. "/stats/" and "/stats" will be equivalent. The URL will be normalised and unquoted before calling `match_route/3` - e.g. handling any "\..\"-style directory traversal and % encoding of non-standard characters. + +All modules are tried until either an `ok` response or a `method_not_allowed` response is returned. The `method_not_allowed` response will trigger a [HTTP 405](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/405) error response. + +#### check_permissions + +The check_permissions callback function will be passed: + +- All request headers as a `riak_api_web_headers:headers()` object which can be managed via the `riak_api_web_headers` module. +- The scheme for the listener (e.g. http or https). +- The IP address of the peer making the request. +- The client certificate used in any TLS negotiation (or `undefined` if no certificate used). +- The context object returned from the `match_route` function call. The context object should have been initiated with any details necessary to make a permission check from the path (e.g. in Riak the object Bucket). + +The check_permission should return either an `ok` response with a potentially updated context or a `halt_response()`. + +Within Riak, most check_permissions implementation should use the `riak_kv_web_common:check_permissions/5` function, to standardise the application of security controls. + +#### parse_query_params + +The query parameters will be passed as list of `{Key, Value}` tuples with the Key and Value both being binaries as they were presented in the URI (following percent decoding). If a key is provided as a parameter within the query parameters without value, the value will be the atom `true`. + +As with other callbacks, valid responses are either an `ok` with updated context object, or a `halt_response()` (for example if an invalid query parameter has been provided). + +#### parse_request_headers + +The request headers will be passed as `riak_api_web_headers:headers()` object which can be managed via the `riak_api_web_headers` module. Note this will be the same information as passed into the `check_permissions` callback. It is recommended to defer parsing non-security request headers until this stage (when permissions have already been checked), to reduce the workload undertaken on unverified requests. + +As with other callbacks, valid responses are either an `ok` with updated context object, or a `halt_response()` (for example if an invalid request header has been provided). + +The `riak_api_web_headers` module requires knowledge if the header has an `atom()` or a `binary()` as a key. The module has a `standard_header_key()` type which list all header keys which will be atoms and not binaries. For binary keys, as well as fetching individual headers by key, it is also possible to fold to return all headers with a given prefix. When fetching binary header keys, it can be specified that the header key being request has already been lower-cased (using `string:casefold/1`) so that lower-casing does not need to be repeated within the function. + +Note that headers may have single values or multiple values, check the function spec and ensure both cases are handled if required. Multiple values will occur either because the header value is a comma-separated list, or because multiple header values have been provided under a repeated header key. + +Only the 'Content-Length' and 'Transfer-Encoding' headers are parsed within the framework - to obtain a static content length, or prepare for a chunked request body. Only the transfer-encoding of `chunked` is managed within the framework. + +There is no handling of information in other request headers within Silver Machine, all other headers are only handled within the callback functions. So all headers that are expected to have meaning must be parsed and have appropriate details added to the context for downstream consideration in the `process_request/2` callback function (e.g. handling conditional headers such as 'If-None-Match', matching 'Accept' header to content-types provided, or validating `Referer' details). + +#### process_request + +The process_request callback function will be passed a `riak_api_web_body:req_body/0` object, or the atom `none` if and only if it had been stipulated in the size limits returned from the `match_route/3` callback that only a 0-length body is supported. Before providing a `none` request body, the buffer is checked by the acceptor to confirm no body has been provided (and a ['413 Content Too Large'](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/413) response is returned if there is a body present). + +At this stage the request body, if present, has not been read from the TCP buffer, and so sending of a large body will be suspended at the client (if the TCP window is full). The acceptor (and the service it supports) is protected from a memory perspective until the body is fetched by the process_request callback function. Fetching the body is managed through `riak_api_web_body:get_body/3` function, and the body may be fetched entirely, or partially up to a size limit. Selecting the body in slices may be used if the intention is to slice and store large inbound requests without reading the whole request into memory. There is no relationship between slices and chunks - slice sizes are defined on the server side, and chunk sizes are defined on the client side. + +The `process_request/2` callback function should not return a positive response unless the entirety of the body has been read. If the reading of the body is curtailed then a `halt_response()` must be returned as otherwise the handling of further requests in a keepalive connection may be corrupted. + +A positive response must contain a response tuple as well as `ok` and the updated context object. This tuple consists of: + +```erlang +{ + riak_api_web_acceptor:response_code(), + riak_api_web_headers:header_list(), + response_body(), + boolean(), + riak_api_web_body:req_body()|none +}. +``` + +- response_code; the HTTP response code to be returned, this may be an error code as well as a positive code. If supporting pipelined requests, it may be preferable to return `404` errors as a positive response rather than as a `halt_response()` that would cause the connection to be terminated. +- response header_list; a list of Key/Value tuples representing the headers to be added to the response. Keys should be atom() if it is a standard_key, and otherwise a binary in the case it is intended to be presented. The only headers added by SilverMachine will be a 'Date' Header, a 'Server' header, a 'Connection' header and either a 'Content-Length' header or 'Transfer-Encoding` header as appropriate. Any user-provided headers that overlap with these default headers will override the defaults. +- The response body; either a binary() (in which case the response will be sent immediately with a fixed `Content-Length`), or a stream function in which case every binary returned from the stream function will be returned as a 'chunk' in a chink-encoded response (until the function returns the atom `done`). +- A keepalive supported boolean; may be switched from true to false if there is a requirement to close this connection rather than allow further requests to be received. +- the `req_body` remainder, i.e. the final `req_body` object returned from the call to `riak_api_web_body:get_body/3`. In fetching the body, when supporting pipelined requests some of a subsequent request may be read into the buffer, and returning the final req_body object ensures that this buffer is available to the acceptor to process that request. The atom `none` should be returned if the atom `none` was received as the request body. + +For an example stream function to return the body, see the `riak_kv_ag_index` module. Note that when calling `riak_api_web_body:get_body/3` the req_body object tracks the volume of data received versus the configured size limit - and may return `{error, content_too_large}` if the size is exceeded. + +#### record_request + +The record_request callback function is passed timing information from the handling of the request, as well as the request context object. This is intended to be used for any statistics or logging activity required by the module. + + +## Limitations + +Silver Machine is designed to support a subset of the HTTP protocol, the restrictions include: + +- Limited to only support HTTP 1.0 and HTTP 1.1 connections; + - HTTP 1.1 request pipelining is supported, but currently subject to limited testing. Adding multiplexed requests (i.e. HTTP 2.0) will require a significant change. +- Supported methods are limited to 'OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE' and 'TRACE' - but all functionality must exist within the callback functions of the handler modules. The framework is unaware of what method is being used (and so may return a body to a HEAD request for example). +- Only 'Content-Length' and 'Transfer-Encoding' request headers are understood by the framework, and only chunked (rather than compressed) encoding is handled automatically. Only 'Server', 'Date' and 'Connection' response headers are added by the framework, if not present in the output from the callback function. +- There is no control in the ordering of response HTTP headers, headers in the response on the wire by be returned in a different order to headers in the response returned by a callback function. +- TLS support is limited by that offered in the OTP deployment. diff --git a/src/riak_api_web.erl b/src/riak_api_web.erl index b05660c..fed15f6 100644 --- a/src/riak_api_web.erl +++ b/src/riak_api_web.erl @@ -29,7 +29,8 @@ get_listeners/0, binding_config/2, add_routes/1, - get_route/3, + add_routes/2, + get_route/4, spec_name/3, rfc1123_date/1, rfc1123_date/2, @@ -38,8 +39,7 @@ ] ). --define(ROUTE_KEY, {?MODULE, web_routes}). - +-type binding() :: {inet:ip_address(), inet:port_number()}. -type route() :: {1..100, module()}. %%%============================================================================ @@ -48,11 +48,20 @@ -spec add_routes(list(route())) -> ok. add_routes(Routes) -> - CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), + add_routes(default, Routes). + +-spec add_routes( + inet:port_number() | default, + list(route()) +) -> + ok. +add_routes(ServerName, Routes) -> + CurrentRoutes = persistent_term:get({?MODULE, ServerName}, []), NewRoutes = lists:keysort(1, CurrentRoutes ++ Routes), - persistent_term:put(?ROUTE_KEY, NewRoutes). + persistent_term:put({?MODULE, ServerName}, NewRoutes). -spec get_route( + inet:port_number(), riak_api_web_acceptor:method(), unicode:chardata(), list(unicode:chardata()) @@ -64,16 +73,20 @@ add_routes(Routes) -> any() } | riak_api_web_acceptor:halt_response(). -get_route(Method, Path, SplitPath) -> - CurrentRoutes = persistent_term:get(?ROUTE_KEY, []), - get_route(CurrentRoutes, Method, Path, SplitPath). +get_route(Port, Method, Path, SplitPath) -> + CurrentRoutes = + persistent_term:get( + {?MODULE, Port}, + persistent_term:get({?MODULE, default}, []) + ), + select_route(CurrentRoutes, Method, Path, SplitPath). -get_route([], _Method, _Path, _SP) -> +select_route([], _Method, _Path, _SP) -> {halt, 404, [], <<>>, []}; -get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> +select_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> case CallbackMod:match_route(Method, Path, SplitPath) of nomatch -> - get_route(Rest, Method, Path, SplitPath); + select_route(Rest, Method, Path, SplitPath); {method_not_allowed, AllowedMethods} -> AllowHdrVal = iolist_to_binary( @@ -96,14 +109,9 @@ get_route([{_P, CallbackMod} | Rest], Method, Path, SplitPath) -> get_listeners() -> get_listeners(http) ++ get_listeners(https). +-spec get_listeners(http | https) -> list({https | https, binding()}). get_listeners(Scheme) -> - Listeners = - case app_helper:try_envs([{riak_api, Scheme}], []) of - {riak_api, Scheme, List} when is_list(List) -> - List; - _ -> - [] - end, + Listeners = application:get_env(riak_api, Scheme, []), lists:usort([{Scheme, Binding} || Binding <- Listeners]). binding_config(Scheme, Binding) -> diff --git a/src/riak_api_web_acceptor.erl b/src/riak_api_web_acceptor.erl index 3374200..4f4cd79 100644 --- a/src/riak_api_web_acceptor.erl +++ b/src/riak_api_web_acceptor.erl @@ -26,7 +26,7 @@ -feature(maybe_expr, enable). -endif. --export([start_link/1, init/2]). +-export([start_link/2, init/3]). -export([extend_buffer/4, compile_detectors/0]). @@ -93,22 +93,22 @@ %%% API %%%============================================================================ --spec start_link(riak_api_web_socket:socket()) -> pid(). -start_link(Socket) -> - spawn_link(?MODULE, init, [self(), Socket]). +-spec start_link(riak_api_web_socket:socket(), inet:port_number()) -> pid(). +start_link(Socket, Port) -> + spawn_link(?MODULE, init, [self(), Socket, Port]). --spec init(pid(), riak_api_web_socket:socket()) -> ok. -init(Server, Listener) -> +-spec init(pid(), riak_api_web_socket:socket(), inet:port_number()) -> ok. +init(Server, Listener, Port) -> case riak_api_web_socket:accept(Listener, ?ACCEPT_TIMEOUT) of {ok, Socket} -> ok = riak_api_web_socket:acceptor_accepted(Server), {ok, PeerIP, Cert} = riak_api_web_socket:get_peer(Socket), - loop(Socket, <<>>, PeerIP, Cert); + loop(Socket, <<>>, PeerIP, Cert, Port); {error, timeout} -> - init(Server, Listener); + init(Server, Listener, Port); {error, {tls_alert, Alert}} -> ?LOG_WARNING("TLS Alert received ~0p", [Alert]), - init(Server, Listener); + init(Server, Listener, Port); {error, closed} -> ok; {error, Other} -> @@ -123,15 +123,16 @@ init(Server, Listener) -> riak_api_web_socket:socket(), binary(), inet:ip_address(), - public_key:cert() | undefined + public_key:cert() | undefined, + inet:port_number() ) -> ok. -loop(Socket, InitBuffer, PeerIP, Cert) -> +loop(Socket, InitBuffer, PeerIP, Cert, Port) -> %% In the keepalive loop, the send buffer is assumed to be empty %% An so pipelining of requests (in parallel) is explicitly not supported - case handle_request(Socket, InitBuffer, PeerIP, Cert) of + case handle_request(Socket, InitBuffer, PeerIP, Cert, Port) of {KeepAlive, Buffer} when KeepAlive == true -> - loop(Socket, Buffer, PeerIP, Cert); + loop(Socket, Buffer, PeerIP, Cert, Port); _Close -> riak_api_web_socket:close(Socket), ok @@ -141,10 +142,11 @@ loop(Socket, InitBuffer, PeerIP, Cert) -> riak_api_web_socket:socket(), binary(), inet:ip_address(), - public_key:cert() | undefined + public_key:cert() | undefined, + inet:port_number() ) -> {boolean(), binary()} | close. -handle_request(Socket, InitBuffer, PeerIP, Cert) -> +handle_request(Socket, InitBuffer, PeerIP, Cert, Port) -> StartTime = os:system_time(microsecond), reset_version(), RequestResult = @@ -159,7 +161,7 @@ handle_request(Socket, InitBuffer, PeerIP, Cert) -> {MaxHdrCount, MaxHdrSize, MaxBodySize}, InitModCtx } ?= - riak_api_web:get_route(Method, Path, SplitPath), + riak_api_web:get_route(Port, Method, Path, SplitPath), {ok, ReqHeaders, BdyBuffer} ?= get_request_headers( HdrBuffer, diff --git a/src/riak_api_web_socket.erl b/src/riak_api_web_socket.erl index fe993e1..8390910 100644 --- a/src/riak_api_web_socket.erl +++ b/src/riak_api_web_socket.erl @@ -241,7 +241,8 @@ init(Options) -> end, SocketOpts = default_socket_options(IP), {ok, Listener} = listen(Protocol, Port, SocketOpts, BufferOpts, SSLOpts), - {AcceptorPool, StartSize, MaxSize} = get_acceptor_pool(Listener, Options), + {AcceptorPool, StartSize, MaxSize} = + get_acceptor_pool(Listener, Port, Options), ?LOG_INFO( "Acceptor pool for web started on IP ~0p port ~w of size ~w", [IP, Port, StartSize] @@ -284,7 +285,8 @@ handle_cast(accepted, State) -> PS when PS < State#socket_state.max_pool_size -> P = riak_api_web_acceptor:start_link( - State#socket_state.listener + State#socket_state.listener, + State#socket_state.port ), { noreply, @@ -330,28 +332,28 @@ default_socket_options(IPAddr) -> {backlog, 128} ]. --spec get_acceptor_pool(socket(), list(option())) -> +-spec get_acceptor_pool(socket(), inet:port_number(), list(option())) -> {list(pid()), pos_integer(), pos_integer()}. -get_acceptor_pool(Listener, Options) -> +get_acceptor_pool(Listener, Port, Options) -> StartSize = - case lists:keyfind(web_acceptor_pool_start_size, 1, Options) of - {web_acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> + case lists:keyfind(acceptor_pool_start_size, 1, Options) of + {acceptor_pool_start_size, SS} when is_integer(SS), SS > 0 -> SS; false -> application:get_env( riak_api, - acceptor_pool_start_size, + web_acceptor_pool_start_size, ?POOL_SIZE_DEFAULT ) end, MaxSize = - case lists:keyfind(web_acceptor_pool_max_size, 1, Options) of - {web_acceptor_pool_start_size, MS} when is_integer(MS), MS > 0 -> + case lists:keyfind(acceptor_pool_max_size, 1, Options) of + {acceptor_pool_max_size, MS} when is_integer(MS), MS > 0 -> MS; false -> application:get_env( riak_api, - acceptor_pool_max_size, + web_acceptor_pool_max_size, ?POOL_SIZE_MAX_DEFAULT ) end, @@ -362,7 +364,7 @@ get_acceptor_pool(Listener, Options) -> MaxSize >= StartSize -> { - start_acceptor_pool(Listener, StartSize), + start_acceptor_pool(Listener, Port, StartSize), StartSize, MaxSize }; @@ -373,17 +375,22 @@ get_acceptor_pool(Listener, Options) -> [InvalidConfig] ), { - start_acceptor_pool(Listener, ?POOL_SIZE_DEFAULT), + start_acceptor_pool(Listener, Port, ?POOL_SIZE_DEFAULT), ?POOL_SIZE_DEFAULT, ?POOL_SIZE_MAX_DEFAULT } end. --spec start_acceptor_pool(socket(), pos_integer()) -> list(pid()). -start_acceptor_pool(Listener, Size) -> +-spec start_acceptor_pool( + socket(), + inet:port_number(), + pos_integer() +) -> + list(pid()). +start_acceptor_pool(Listener, Port, Size) -> lists:map( fun(_I) -> - P = riak_api_web_acceptor:start_link(Listener), + P = riak_api_web_acceptor:start_link(Listener, Port), true = is_pid(P), P end, diff --git a/test/riak_api_web_ets_store.erl b/test/riak_api_web_ets_store.erl index ee846c3..cad4df8 100644 --- a/test/riak_api_web_ets_store.erl +++ b/test/riak_api_web_ets_store.erl @@ -265,7 +265,7 @@ setup() -> {name, SpecName}, {ip, IPAddr}, {port, TestPort}, - {web_acceptor_pool_start_size, 4} + {acceptor_pool_start_size, 4} ], {ok, _Pid} = riak_api_web_socket:start_link(Options), riak_api_web:add_routes([{20, ?MODULE}]), diff --git a/test/riak_api_web_get_random.erl b/test/riak_api_web_get_random.erl index c59dabe..999550b 100644 --- a/test/riak_api_web_get_random.erl +++ b/test/riak_api_web_get_random.erl @@ -262,10 +262,11 @@ setup() -> {name, SpecName}, {ip, IPAddr}, {port, TestPort}, - {web_acceptor_pool_start_size, 4} + {acceptor_pool_start_size, 4}, + {acceptor_pool_max_size, 8} ], {ok, _Pid} = riak_api_web_socket:start_link(Options), - riak_api_web:add_routes([{10, ?MODULE}]), + riak_api_web:add_routes(TestPort, [{10, ?MODULE}]), {ok, _HTTPC} = inets:start(httpc, [{profile, test_client}]), ok = httpc:set_options([{verbose, false}], test_client), {SpecName, IPAddr, TestPort} diff --git a/test/riak_api_web_trigger.erl b/test/riak_api_web_trigger.erl index c8d25b9..949e526 100644 --- a/test/riak_api_web_trigger.erl +++ b/test/riak_api_web_trigger.erl @@ -177,7 +177,7 @@ setup() -> {name, SpecName}, {ip, IPAddr}, {port, TestPort}, - {web_acceptor_pool_start_size, 4} + {acceptor_pool_start_size, 4} ], {ok, _Pid} = riak_api_web_socket:start_link(Options), riak_api_web:add_routes([{10, ?MODULE}]),