Skip to content

Commit ecd64eb

Browse files
committed
rough initial version of merging
1 parent 5a6d6dc commit ecd64eb

5 files changed

Lines changed: 247 additions & 21 deletions

File tree

lib/b4.ex

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,15 @@ defmodule B4 do
5252
end
5353

5454
def merge(directory, timeout \\ 5_000) do
55-
:ok = Writer.set_merge_in_progress(directory, true)
56-
:ok = KeydirOwner.merge(directory, timeout)
57-
:ok = Writer.set_merge_in_progress(directory, false)
55+
with {_, :ok} <- {:set_merge_in_progress, Writer.set_merge_in_progress(directory, true)},
56+
{_, :ok} <- {:merge_action, KeydirOwner.merge(directory, timeout)},
57+
{_, :ok} <- {:unset_merge_in_process, Writer.set_merge_in_progress(directory, false)} do
58+
:ok
59+
else
60+
{_action, e} ->
61+
Writer.set_merge_in_progress(directory, false)
62+
{:error, e}
63+
end
5864
end
5965

6066
def close(directory) do

lib/b4/keydir.ex

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ defmodule B4.Keydir do
2828
end
2929
end
3030

31+
def contains_key?(tid, key) do
32+
:ets.member(tid, key)
33+
end
34+
3135
def keys(tid) do
3236
tid
3337
|> :ets.match({:"$1", :_, :_, :_, :_})

lib/b4/keydir_owner.ex

Lines changed: 146 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ defmodule B4.KeydirOwner do
66
alias B4.{Files, Keydir, Writer}
77

88
defmodule State do
9-
@enforce_keys [:directory, :tid]
10-
defstruct [:directory, :tid]
9+
@enforce_keys [:directory, :tid, :target_file_size]
10+
defstruct [:directory, :tid, :target_file_size]
1111
end
1212

1313
def start_link(%{directory: directory} = args) do
@@ -23,7 +23,7 @@ defmodule B4.KeydirOwner do
2323
end
2424

2525
@impl GenServer
26-
def init(%{directory: directory} = _init_arg) do
26+
def init(%{directory: directory, options: [target_file_size: target_file_size]} = _init_arg) do
2727
tid = Keydir.new()
2828

2929
:ok = :persistent_term.put({:tid, directory}, tid)
@@ -34,12 +34,15 @@ defmodule B4.KeydirOwner do
3434
Files.apply_file_to_keydir(path, tid)
3535
end)
3636

37-
{:ok, %State{directory: directory, tid: tid}}
37+
{:ok, %State{directory: directory, tid: tid, target_file_size: target_file_size}}
3838
end
3939

4040
@impl GenServer
41-
def handle_call(:merge, _from, %State{directory: directory} = state) do
42-
# TODO do merge here
41+
def handle_call(
42+
:merge,
43+
_from,
44+
%State{directory: directory, tid: tid, target_file_size: target_file_size} = state
45+
) do
4346
# two sets:
4447
# old read file set
4548
# new read file set
@@ -51,20 +54,145 @@ defmodule B4.KeydirOwner do
5154
# ELSE
5255
# skip
5356
# END
54-
write_file_id = Writer.write_file_id(directory)
55-
56-
read_only_database_files = Files.read_only_database_files(directory, write_file_id)
57-
58-
Enum.each(read_only_database_files, fn path ->
59-
nil
57+
#
58+
# at given any time, there can only be ONE live entry
59+
# for a given key,
60+
# meaning any other entries for a given key are
61+
# therefor old, and free for deletion
62+
current_write_file_id = Writer.write_file_id(directory)
63+
64+
read_only_database_files = Files.read_only_database_files(directory, current_write_file_id)
65+
66+
{:ok, %{write_file: merge_write_file, file_id: merge_write_file_id}} =
67+
Writer.new_write_file(directory)
68+
69+
Enum.reduce(read_only_database_files, %{merge_file_ids: MapSet.new()}, fn path, outer_acc ->
70+
acc_for_file =
71+
path
72+
|> Files.stream_entries()
73+
|> Enum.reduce(
74+
%{
75+
merge_write_file: merge_write_file,
76+
merge_write_file_id: merge_write_file_id,
77+
merge_file_ids: MapSet.new(),
78+
merge_write_file_position: 0
79+
},
80+
fn %{
81+
entry: %{
82+
crc32: crc32,
83+
entry_id: on_disk_entry_id,
84+
key_size: key_size,
85+
value_size: value_size,
86+
key_bytes: key_bytes,
87+
value_bytes: value_bytes
88+
},
89+
meta: %{}
90+
},
91+
%{
92+
merge_write_file: merge_write_file,
93+
merge_write_file_id: merge_write_file_id,
94+
merge_write_file_position: merge_write_file_position
95+
} =
96+
acc ->
97+
key = :erlang.binary_to_term(key_bytes)
98+
99+
case Keydir.fetch(tid, key) do
100+
{:ok, {_key, _file_id, _entry_size, _file_position, keydir_entry_id}}
101+
when keydir_entry_id == on_disk_entry_id ->
102+
{:ok,
103+
%{
104+
merge_write_file: merge_write_file,
105+
merge_write_file_id: merge_write_file_id,
106+
merge_write_file_position: merge_write_file_position
107+
}} =
108+
if merge_write_file_position >= target_file_size do
109+
{:ok, %{write_file: merge_write_file, file_id: merge_write_file_id}} =
110+
Writer.new_write_file(directory)
111+
112+
{:ok,
113+
%{
114+
merge_write_file: merge_write_file,
115+
merge_write_file_id: merge_write_file_id,
116+
merge_write_file_position: 0
117+
}}
118+
else
119+
{:ok,
120+
%{
121+
merge_write_file: merge_write_file,
122+
merge_write_file_id: merge_write_file_id,
123+
merge_write_file_position: merge_write_file_position
124+
}}
125+
end
126+
127+
entry =
128+
[
129+
Writer.int_to_u32_bytes(crc32),
130+
Writer.int_to_u128_bytes(on_disk_entry_id),
131+
Writer.int_to_u32_bytes(key_size),
132+
Writer.int_to_u32_bytes(value_size),
133+
key_bytes,
134+
value_bytes
135+
]
136+
137+
:ok = :file.write(merge_write_file, entry)
138+
139+
entry_size = :erlang.iolist_size(entry)
140+
141+
true =
142+
Keydir.insert(
143+
tid,
144+
key,
145+
merge_write_file_id,
146+
entry_size,
147+
merge_write_file_position,
148+
on_disk_entry_id
149+
)
150+
151+
%{
152+
acc
153+
| merge_write_file: merge_write_file,
154+
merge_write_file_id: merge_write_file_id,
155+
merge_write_file_position: acc.merge_write_file_position + entry_size,
156+
merge_file_ids: MapSet.put(acc.merge_file_ids, merge_write_file_id)
157+
}
158+
159+
# the entry isn't in the keydir,
160+
# so it isn't live anymore,
161+
# so skip it
162+
:error ->
163+
acc
164+
165+
# the ids for the given key didn't match,
166+
# so they are old version of that key,
167+
# so we ignore them
168+
_ ->
169+
acc
170+
end
171+
end
172+
)
173+
174+
Map.update!(outer_acc, :merge_file_ids, fn merge_file_ids ->
175+
MapSet.union(merge_file_ids, acc_for_file[:merge_file_ids])
176+
end)
60177
end)
61178

62-
# - read every entry in the read_only_database_files
63-
# - if the key is in keydir and the id == current id:
64-
# keep the key, add to new read file set, update keydir
65-
# else
66-
# skip
67-
# end
179+
# |> IO.inspect(label: "merge file acc state")
180+
181+
# deleting these files happens AFTER
182+
# the keydir has been updated with the new location
183+
# on disk for the given entry,
184+
# so this data is "dead",
185+
# and there are no readers of it,
186+
# barring some reader that has initiated some incredibly
187+
# slow read prior to the merge starting.
188+
#
189+
# TODO
190+
# this is a possibility, but not a huge one,
191+
# so we should probably guard for this in the future in some way.
192+
Enum.each(read_only_database_files, fn old_read_only_file ->
193+
File.rm!(old_read_only_file)
194+
# IO.inspect("deleted #{old_read_only_file}")
195+
end)
68196

69197
{:reply, :ok, state}
70198
end

lib/b4/writer.ex

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ defmodule B4.Writer do
199199
<<integer::unsigned-integer-32>>
200200
end
201201

202+
def int_to_u128_bytes(integer) when is_integer(integer) do
203+
<<integer::unsigned-integer-128>>
204+
end
205+
202206
def name(directory) do
203207
:"#{__MODULE__}-#{directory}"
204208
end

test/b4_test.exs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,88 @@ defmodule B4Test do
121121
assert :ok = B4.delete(dir, "c")
122122
assert [] = Enum.sort(B4.keys(dir))
123123
end
124+
125+
test "simple merge", %{dir: dir} do
126+
Enum.each(1..4, fn i ->
127+
assert :ok = B4.new(dir)
128+
assert :ok = B4.insert(dir, "a", i)
129+
assert :ok = B4.close(dir)
130+
end)
131+
132+
# File.ls!(dir) |> IO.inspect(label: "preexisting read files")
133+
assert Enum.count(File.ls!(dir)) == 4
134+
135+
# Enum.each(File.ls!(dir), fn file ->
136+
# Path.join([dir, file])
137+
# |> File.stat!()
138+
# |> IO.inspect()
139+
# end)
140+
141+
assert :ok = B4.new(dir)
142+
# one more file because opening the db always creates a fresh writer file
143+
assert Enum.count(File.ls!(dir)) == 5
144+
# File.ls!(dir) |> IO.inspect(label: "all files pre merge")
145+
assert :ok = B4.merge(dir)
146+
# merging should:
147+
# - create 1 new merge file (for this particular dataset)
148+
# - delete the 4 previous read files
149+
# - leave the current write file untouched
150+
# for a total of 2 files
151+
assert Enum.count(File.ls!(dir)) == 2
152+
# File.ls!(dir) |> IO.inspect(label: "all files post merge")
153+
assert :ok = B4.close(dir)
154+
155+
assert :ok = B4.new(dir)
156+
# we should now have:
157+
# - the new current write file
158+
# - the previous write file
159+
# - the merge file
160+
assert Enum.count(File.ls!(dir)) == 3
161+
assert ["a"] = B4.keys(dir)
162+
assert {:ok, 4} = B4.fetch(dir, "a")
163+
end
164+
165+
test "simple merge with deletes", %{dir: dir} do
166+
Enum.each(1..4, fn i ->
167+
assert :ok = B4.new(dir)
168+
assert :ok = B4.insert(dir, "a", i)
169+
assert :ok = B4.insert(dir, "b", i)
170+
assert :ok = B4.close(dir)
171+
end)
172+
173+
# File.ls!(dir) |> IO.inspect(label: "preexisting read files")
174+
assert Enum.count(File.ls!(dir)) == 4
175+
176+
# Enum.each(File.ls!(dir), fn file ->
177+
# Path.join([dir, file])
178+
# |> File.stat!()
179+
# |> IO.inspect()
180+
# end)
181+
182+
assert :ok = B4.new(dir)
183+
assert :ok = B4.delete(dir, "a")
184+
185+
# one more file because opening the db always creates a fresh writer file
186+
assert Enum.count(File.ls!(dir)) == 5
187+
# File.ls!(dir) |> IO.inspect(label: "all files pre merge")
188+
assert :ok = B4.merge(dir)
189+
# merging should:
190+
# - create 1 new merge file (for this particular dataset)
191+
# - delete the 4 previous read files
192+
# - leave the current write file untouched
193+
# for a total of 2 files
194+
assert Enum.count(File.ls!(dir)) == 2
195+
# File.ls!(dir) |> IO.inspect(label: "all files post merge")
196+
assert :ok = B4.close(dir)
197+
198+
assert :ok = B4.new(dir)
199+
# we should now have:
200+
# - the new current write file
201+
# - the previous write file
202+
# - the merge file
203+
assert Enum.count(File.ls!(dir)) == 3
204+
assert ["b"] = B4.keys(dir)
205+
assert :not_found = B4.fetch(dir, "a")
206+
assert {:ok, 4} = B4.fetch(dir, "b")
207+
end
124208
end

0 commit comments

Comments
 (0)