Delete unreferened state groups during purge

pull/4006/head
Erik Johnston 2018-10-04 15:18:52 +01:00
parent d86794325f
commit 17d585753f
2 changed files with 77 additions and 6 deletions

View File

@ -2025,6 +2025,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
logger.info("[purge] finding state groups which depend on redundant"
" state groups")
remaining_state_groups = []
unreferenced_state_groups = 0
for i in range(0, len(state_rows), 100):
chunk = [sg for sg, in state_rows[i:i + 100]]
# look for state groups whose prev_state_group is one we are about
@ -2037,13 +2038,33 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
retcols=["state_group"],
keyvalues={},
)
remaining_state_groups.extend(
row["state_group"] for row in rows
# exclude state groups we are about to delete: no point in
# updating them
if row["state_group"] not in state_groups_to_delete
)
for row in rows:
sg = row["state_group"]
if sg in state_groups_to_delete:
# exclude state groups we are about to delete: no point in
# updating them
continue
if not self._is_state_group_referenced(txn, sg):
# Let's also delete unreferenced state groups while we're
# here, since otherwise we'd need to de-delta them
state_groups_to_delete.add(sg)
unreferenced_state_groups += 1
continue
remaining_state_groups.append(sg)
logger.info(
"[purge] found %i extra unreferenced state groups to delete",
unreferenced_state_groups,
)
logger.info(
"[purge] de-delta-ing %i remaining state groups",
len(remaining_state_groups),
)
# Now we turn the state groups that reference to-be-deleted state
# groups to non delta versions.

View File

@ -1041,6 +1041,56 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
return count
def _is_state_group_referenced(self, txn, state_group):
"""Checks if a given state group is referenced, or is safe to delete.
A state groups is referenced if it or any of its descendants are
pointed at by an event. (A descendant is a group which has the given
state_group as a prev group)
"""
# We check this by doing a depth first search to look for any
# descendant referenced by `event_to_state_groups`.
# State groups we need to check, contains state groups that are
# descendants of `state_group`
state_groups_to_search = [state_group]
# Set of state groups we've already checked
state_groups_searched = set()
while state_groups_to_search:
state_group = state_groups_to_search.pop() # Next state group to check
is_referenced = self._simple_select_one_onecol_txn(
txn,
table="event_to_state_groups",
keyvalues={"state_group": state_group},
retcol="event_id",
allow_none=True,
)
if is_referenced:
# A descendant is referenced by event_to_state_groups, so
# original state group is referenced.
return True
state_groups_searched.add(state_group)
# Find all children of current state group and add to search
references = self._simple_select_onecol_txn(
txn,
table="state_group_edges",
keyvalues={"prev_state_group": state_group},
retcol="state_group",
)
state_groups_to_search.extend(references)
# Lets be paranoid and check for cycles
if state_groups_searched.intersection(references):
raise Exception("State group %s has cyclic dependency", state_group)
return False
class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
""" Keeps track of the state at a given event.