Fix and refactor room and user stats (#5971)

Previously the stats were not being correctly populated.
pull/5982/head
Erik Johnston 2019-09-04 13:04:27 +01:00 committed by GitHub
parent ea128a3e8e
commit 6e834e94fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1640 additions and 639 deletions

1
changelog.d/5971.bugfix Normal file
View File

@ -0,0 +1 @@
Fix room and user stats tracking.

View File

@ -0,0 +1,62 @@
Room and User Statistics
========================
Synapse maintains room and user statistics (as well as a cache of room state),
in various tables. These can be used for administrative purposes but are also
used when generating the public room directory.
# Synapse Developer Documentation
## High-Level Concepts
### Definitions
* **subject**: Something we are tracking stats about currently a room or user.
* **current row**: An entry for a subject in the appropriate current statistics
table. Each subject can have only one.
* **historical row**: An entry for a subject in the appropriate historical
statistics table. Each subject can have any number of these.
### Overview
Stats are maintained as time series. There are two kinds of column:
* absolute columns where the value is correct for the time given by `end_ts`
in the stats row. (Imagine a line graph for these values)
* They can also be thought of as 'gauges' in Prometheus, if you are familiar.
* per-slice columns where the value corresponds to how many of the occurrences
occurred within the time slice given by `(end_ts bucket_size)…end_ts`
or `start_ts…end_ts`. (Imagine a histogram for these values)
Stats are maintained in two tables (for each type): current and historical.
Current stats correspond to the present values. Each subject can only have one
entry.
Historical stats correspond to values in the past. Subjects may have multiple
entries.
## Concepts around the management of stats
### Current rows
Current rows contain the most up-to-date statistics for a room.
They only contain absolute columns
### Historical rows
Historical rows can always be considered to be valid for the time slice and
end time specified.
* historical rows will not exist for every time slice they will be omitted
if there were no changes. In this case, the following assumptions can be
made to interpolate/recreate missing rows:
- absolute fields have the same values as in the preceding row
- per-slice fields are zero (`0`)
* historical rows will not be retained forever rows older than a configurable
time will be purged.
#### Purge
The purging of historical rows is not yet implemented.

View File

@ -27,19 +27,16 @@ class StatsConfig(Config):
def read_config(self, config, **kwargs):
self.stats_enabled = True
self.stats_bucket_size = 86400
self.stats_bucket_size = 86400 * 1000
self.stats_retention = sys.maxsize
stats_config = config.get("stats", None)
if stats_config:
self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
self.stats_bucket_size = (
self.parse_duration(stats_config.get("bucket_size", "1d")) / 1000
self.stats_bucket_size = self.parse_duration(
stats_config.get("bucket_size", "1d")
)
self.stats_retention = (
self.parse_duration(
stats_config.get("retention", "%ds" % (sys.maxsize,))
)
/ 1000
self.stats_retention = self.parse_duration(
stats_config.get("retention", "%ds" % (sys.maxsize,))
)
def generate_config_section(self, config_dir_path, server_name, **kwargs):

View File

@ -14,15 +14,14 @@
# limitations under the License.
import logging
from collections import Counter
from twisted.internet import defer
from synapse.api.constants import EventTypes, JoinRules, Membership
from synapse.api.constants import EventTypes, Membership
from synapse.handlers.state_deltas import StateDeltasHandler
from synapse.metrics import event_processing_positions
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import UserID
from synapse.util.metrics import Measure
logger = logging.getLogger(__name__)
@ -62,11 +61,10 @@ class StatsHandler(StateDeltasHandler):
def notify_new_event(self):
"""Called when there may be more deltas to process
"""
if not self.hs.config.stats_enabled:
if not self.hs.config.stats_enabled or self._is_processing:
return
if self._is_processing:
return
self._is_processing = True
@defer.inlineCallbacks
def process():
@ -75,39 +73,72 @@ class StatsHandler(StateDeltasHandler):
finally:
self._is_processing = False
self._is_processing = True
run_as_background_process("stats.notify_new_event", process)
@defer.inlineCallbacks
def _unsafe_process(self):
# If self.pos is None then means we haven't fetched it from DB
if self.pos is None:
self.pos = yield self.store.get_stats_stream_pos()
# If still None then the initial background update hasn't happened yet
if self.pos is None:
return None
self.pos = yield self.store.get_stats_positions()
# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "stats_delta"):
deltas = yield self.store.get_current_state_deltas(self.pos)
if not deltas:
return
deltas = yield self.store.get_current_state_deltas(self.pos)
logger.info("Handling %d state deltas", len(deltas))
yield self._handle_deltas(deltas)
if deltas:
logger.debug("Handling %d state deltas", len(deltas))
room_deltas, user_deltas = yield self._handle_deltas(deltas)
self.pos = deltas[-1]["stream_id"]
yield self.store.update_stats_stream_pos(self.pos)
max_pos = deltas[-1]["stream_id"]
else:
room_deltas = {}
user_deltas = {}
max_pos = yield self.store.get_room_max_stream_ordering()
event_processing_positions.labels("stats").set(self.pos)
# Then count deltas for total_events and total_event_bytes.
room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes(
self.pos, max_pos
)
for room_id, fields in room_count.items():
room_deltas.setdefault(room_id, {}).update(fields)
for user_id, fields in user_count.items():
user_deltas.setdefault(user_id, {}).update(fields)
logger.debug("room_deltas: %s", room_deltas)
logger.debug("user_deltas: %s", user_deltas)
# Always call this so that we update the stats position.
yield self.store.bulk_update_stats_delta(
self.clock.time_msec(),
updates={"room": room_deltas, "user": user_deltas},
stream_id=max_pos,
)
event_processing_positions.labels("stats").set(max_pos)
if self.pos == max_pos:
break
self.pos = max_pos
@defer.inlineCallbacks
def _handle_deltas(self, deltas):
"""Called with the state deltas to process
Returns:
Deferred[tuple[dict[str, Counter], dict[str, counter]]]
Resovles to two dicts, the room deltas and the user deltas,
mapping from room/user ID to changes in the various fields.
"""
Called with the state deltas to process
"""
room_to_stats_deltas = {}
user_to_stats_deltas = {}
room_to_state_updates = {}
for delta in deltas:
typ = delta["type"]
state_key = delta["state_key"]
@ -115,11 +146,10 @@ class StatsHandler(StateDeltasHandler):
event_id = delta["event_id"]
stream_id = delta["stream_id"]
prev_event_id = delta["prev_event_id"]
stream_pos = delta["stream_id"]
logger.debug("Handling: %r %r, %s", typ, state_key, event_id)
logger.debug("Handling: %r, %r %r, %s", room_id, typ, state_key, event_id)
token = yield self.store.get_earliest_token_for_room_stats(room_id)
token = yield self.store.get_earliest_token_for_stats("room", room_id)
# If the earliest token to begin from is larger than our current
# stream ID, skip processing this delta.
@ -131,203 +161,130 @@ class StatsHandler(StateDeltasHandler):
continue
if event_id is None and prev_event_id is None:
# Errr...
logger.error(
"event ID is None and so is the previous event ID. stream_id: %s",
stream_id,
)
continue
event_content = {}
sender = None
if event_id is not None:
event = yield self.store.get_event(event_id, allow_none=True)
if event:
event_content = event.content or {}
sender = event.sender
# We use stream_pos here rather than fetch by event_id as event_id
# may be None
now = yield self.store.get_received_ts_by_stream_pos(stream_pos)
# All the values in this dict are deltas (RELATIVE changes)
room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
# quantise time to the nearest bucket
now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size
room_state = room_to_state_updates.setdefault(room_id, {})
if prev_event_id is None:
# this state event doesn't overwrite another,
# so it is a new effective/current state event
room_stats_delta["current_state_events"] += 1
if typ == EventTypes.Member:
# we could use _get_key_change here but it's a bit inefficient
# given we're not testing for a specific result; might as well
# just grab the prev_membership and membership strings and
# compare them.
prev_event_content = {}
# We take None rather than leave as a previous membership
# in the absence of a previous event because we do not want to
# reduce the leave count when a new-to-the-room user joins.
prev_membership = None
if prev_event_id is not None:
prev_event = yield self.store.get_event(
prev_event_id, allow_none=True
)
if prev_event:
prev_event_content = prev_event.content
prev_membership = prev_event_content.get(
"membership", Membership.LEAVE
)
membership = event_content.get("membership", Membership.LEAVE)
prev_membership = prev_event_content.get("membership", Membership.LEAVE)
if prev_membership == membership:
continue
if prev_membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, "room", room_id, "joined_members", -1
)
if prev_membership is None:
logger.debug("No previous membership for this user.")
elif membership == prev_membership:
pass # noop
elif prev_membership == Membership.JOIN:
room_stats_delta["joined_members"] -= 1
elif prev_membership == Membership.INVITE:
yield self.store.update_stats_delta(
now, "room", room_id, "invited_members", -1
)
room_stats_delta["invited_members"] -= 1
elif prev_membership == Membership.LEAVE:
yield self.store.update_stats_delta(
now, "room", room_id, "left_members", -1
)
room_stats_delta["left_members"] -= 1
elif prev_membership == Membership.BAN:
yield self.store.update_stats_delta(
now, "room", room_id, "banned_members", -1
)
room_stats_delta["banned_members"] -= 1
else:
err = "%s is not a valid prev_membership" % (repr(prev_membership),)
logger.error(err)
raise ValueError(err)
raise ValueError(
"%r is not a valid prev_membership" % (prev_membership,)
)
if membership == prev_membership:
pass # noop
if membership == Membership.JOIN:
yield self.store.update_stats_delta(
now, "room", room_id, "joined_members", +1
)
room_stats_delta["joined_members"] += 1
elif membership == Membership.INVITE:
yield self.store.update_stats_delta(
now, "room", room_id, "invited_members", +1
)
room_stats_delta["invited_members"] += 1
if sender and self.is_mine_id(sender):
user_to_stats_deltas.setdefault(sender, Counter())[
"invites_sent"
] += 1
elif membership == Membership.LEAVE:
yield self.store.update_stats_delta(
now, "room", room_id, "left_members", +1
)
room_stats_delta["left_members"] += 1
elif membership == Membership.BAN:
yield self.store.update_stats_delta(
now, "room", room_id, "banned_members", +1
)
room_stats_delta["banned_members"] += 1
else:
err = "%s is not a valid membership" % (repr(membership),)
logger.error(err)
raise ValueError(err)
raise ValueError("%r is not a valid membership" % (membership,))
user_id = state_key
if self.is_mine_id(user_id):
# update user_stats as it's one of our users
public = yield self._is_public_room(room_id)
# this accounts for transitions like leave → ban and so on.
has_changed_joinedness = (prev_membership == Membership.JOIN) != (
membership == Membership.JOIN
)
if membership == Membership.LEAVE:
yield self.store.update_stats_delta(
now,
"user",
user_id,
"public_rooms" if public else "private_rooms",
-1,
)
elif membership == Membership.JOIN:
yield self.store.update_stats_delta(
now,
"user",
user_id,
"public_rooms" if public else "private_rooms",
+1,
)
if has_changed_joinedness:
delta = +1 if membership == Membership.JOIN else -1
user_to_stats_deltas.setdefault(user_id, Counter())[
"joined_rooms"
] += delta
room_stats_delta["local_users_in_room"] += delta
elif typ == EventTypes.Create:
# Newly created room. Add it with all blank portions.
yield self.store.update_room_state(
room_id,
{
"join_rules": None,
"history_visibility": None,
"encryption": None,
"name": None,
"topic": None,
"avatar": None,
"canonical_alias": None,
},
)
room_state["is_federatable"] = event_content.get("m.federate", True)
if sender and self.is_mine_id(sender):
user_to_stats_deltas.setdefault(sender, Counter())[
"rooms_created"
] += 1
elif typ == EventTypes.JoinRules:
yield self.store.update_room_state(
room_id, {"join_rules": event_content.get("join_rule")}
)
is_public = yield self._get_key_change(
prev_event_id, event_id, "join_rule", JoinRules.PUBLIC
)
if is_public is not None:
yield self.update_public_room_stats(now, room_id, is_public)
room_state["join_rules"] = event_content.get("join_rule")
elif typ == EventTypes.RoomHistoryVisibility:
yield self.store.update_room_state(
room_id,
{"history_visibility": event_content.get("history_visibility")},
room_state["history_visibility"] = event_content.get(
"history_visibility"
)
is_public = yield self._get_key_change(
prev_event_id, event_id, "history_visibility", "world_readable"
)
if is_public is not None:
yield self.update_public_room_stats(now, room_id, is_public)
elif typ == EventTypes.Encryption:
yield self.store.update_room_state(
room_id, {"encryption": event_content.get("algorithm")}
)
room_state["encryption"] = event_content.get("algorithm")
elif typ == EventTypes.Name:
yield self.store.update_room_state(
room_id, {"name": event_content.get("name")}
)
room_state["name"] = event_content.get("name")
elif typ == EventTypes.Topic:
yield self.store.update_room_state(
room_id, {"topic": event_content.get("topic")}
)
room_state["topic"] = event_content.get("topic")
elif typ == EventTypes.RoomAvatar:
yield self.store.update_room_state(
room_id, {"avatar": event_content.get("url")}
)
room_state["avatar"] = event_content.get("url")
elif typ == EventTypes.CanonicalAlias:
yield self.store.update_room_state(
room_id, {"canonical_alias": event_content.get("alias")}
)
room_state["canonical_alias"] = event_content.get("alias")
elif typ == EventTypes.GuestAccess:
room_state["guest_access"] = event_content.get("guest_access")
@defer.inlineCallbacks
def update_public_room_stats(self, ts, room_id, is_public):
"""
Increment/decrement a user's number of public rooms when a room they are
in changes to/from public visibility.
for room_id, state in room_to_state_updates.items():
yield self.store.update_room_state(room_id, state)
Args:
ts (int): Timestamp in seconds
room_id (str)
is_public (bool)
"""
# For now, blindly iterate over all local users in the room so that
# we can handle the whole problem of copying buckets over as needed
user_ids = yield self.store.get_users_in_room(room_id)
for user_id in user_ids:
if self.hs.is_mine(UserID.from_string(user_id)):
yield self.store.update_stats_delta(
ts, "user", user_id, "public_rooms", +1 if is_public else -1
)
yield self.store.update_stats_delta(
ts, "user", user_id, "private_rooms", -1 if is_public else +1
)
@defer.inlineCallbacks
def _is_public_room(self, room_id):
join_rules = yield self.state.get_current_state(room_id, EventTypes.JoinRules)
history_visibility = yield self.state.get_current_state(
room_id, EventTypes.RoomHistoryVisibility
)
if (join_rules and join_rules.content.get("join_rule") == JoinRules.PUBLIC) or (
(
history_visibility
and history_visibility.content.get("history_visibility")
== "world_readable"
)
):
return True
else:
return False
return room_to_stats_deltas, user_to_stats_deltas

View File

@ -2270,8 +2270,9 @@ class EventsStore(
"room_aliases",
"room_depth",
"room_memberships",
"room_state",
"room_stats",
"room_stats_state",
"room_stats_current",
"room_stats_historical",
"room_stats_earliest_token",
"rooms",
"stream_ordering_to_exterm",

View File

@ -869,6 +869,17 @@ class RegistrationStore(
(user_id_obj.localpart, create_profile_with_displayname),
)
if self.hs.config.stats_enabled:
# we create a new completed user statistics row
# we don't strictly need current_token since this user really can't
# have any state deltas before now (as it is a new user), but still,
# we include it for completeness.
current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn)
self._update_stats_delta_txn(
txn, now, "user", user_id, {}, complete_with_stream_id=current_token
)
self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
txn.call_after(self.is_guest.invalidate, (user_id,))
@ -1140,6 +1151,7 @@ class RegistrationStore(
deferred str|None: A str representing a link to redirect the user
to if there is one.
"""
# Insert everything into a transaction in order to run atomically
def validate_threepid_session_txn(txn):
row = self._simple_select_one_txn(

View File

@ -112,29 +112,31 @@ class RoomMemberWorkerStore(EventsWorkerStore):
@cached(max_entries=100000, iterable=True)
def get_users_in_room(self, room_id):
def f(txn):
# If we can assume current_state_events.membership is up to date
# then we can avoid a join, which is a Very Good Thing given how
# frequently this function gets called.
if self._current_state_events_membership_up_to_date:
sql = """
SELECT state_key FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
"""
else:
sql = """
SELECT state_key FROM room_memberships as m
INNER JOIN current_state_events as c
ON m.event_id = c.event_id
AND m.room_id = c.room_id
AND m.user_id = c.state_key
WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
"""
return self.runInteraction(
"get_users_in_room", self.get_users_in_room_txn, room_id
)
txn.execute(sql, (room_id, Membership.JOIN))
return [to_ascii(r[0]) for r in txn]
def get_users_in_room_txn(self, txn, room_id):
# If we can assume current_state_events.membership is up to date
# then we can avoid a join, which is a Very Good Thing given how
# frequently this function gets called.
if self._current_state_events_membership_up_to_date:
sql = """
SELECT state_key FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
"""
else:
sql = """
SELECT state_key FROM room_memberships as m
INNER JOIN current_state_events as c
ON m.event_id = c.event_id
AND m.room_id = c.room_id
AND m.user_id = c.state_key
WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
"""
return self.runInteraction("get_users_in_room", f)
txn.execute(sql, (room_id, Membership.JOIN))
return [to_ascii(r[0]) for r in txn]
@cached(max_entries=100000)
def get_room_summary(self, room_id):

View File

@ -0,0 +1,152 @@
/* Copyright 2018 New Vector Ltd
* Copyright 2019 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
----- First clean up from previous versions of room stats.
-- First remove old stats stuff
DROP TABLE IF EXISTS room_stats;
DROP TABLE IF EXISTS room_state;
DROP TABLE IF EXISTS room_stats_state;
DROP TABLE IF EXISTS user_stats;
DROP TABLE IF EXISTS room_stats_earliest_tokens;
DROP TABLE IF EXISTS _temp_populate_stats_position;
DROP TABLE IF EXISTS _temp_populate_stats_rooms;
DROP TABLE IF EXISTS stats_stream_pos;
-- Unschedule old background updates if they're still scheduled
DELETE FROM background_updates WHERE update_name IN (
'populate_stats_createtables',
'populate_stats_process_rooms',
'populate_stats_process_users',
'populate_stats_cleanup'
);
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_stats_process_rooms', '{}', '');
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_stats_process_users', '{}', 'populate_stats_process_rooms');
----- Create tables for our version of room stats.
-- single-row table to track position of incremental updates
DROP TABLE IF EXISTS stats_incremental_position;
CREATE TABLE stats_incremental_position (
Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
stream_id BIGINT NOT NULL,
CHECK (Lock='X')
);
-- insert a null row and make sure it is the only one.
INSERT INTO stats_incremental_position (
stream_id
) SELECT COALESCE(MAX(stream_ordering), 0) from events;
-- represents PRESENT room statistics for a room
-- only holds absolute fields
DROP TABLE IF EXISTS room_stats_current;
CREATE TABLE room_stats_current (
room_id TEXT NOT NULL PRIMARY KEY,
-- These are absolute counts
current_state_events INT NOT NULL,
joined_members INT NOT NULL,
invited_members INT NOT NULL,
left_members INT NOT NULL,
banned_members INT NOT NULL,
local_users_in_room INT NOT NULL,
-- The maximum delta stream position that this row takes into account.
completed_delta_stream_id BIGINT NOT NULL
);
-- represents HISTORICAL room statistics for a room
DROP TABLE IF EXISTS room_stats_historical;
CREATE TABLE room_stats_historical (
room_id TEXT NOT NULL,
-- These stats cover the time from (end_ts - bucket_size)...end_ts (in ms).
-- Note that end_ts is quantised.
end_ts BIGINT NOT NULL,
bucket_size BIGINT NOT NULL,
-- These stats are absolute counts
current_state_events BIGINT NOT NULL,
joined_members BIGINT NOT NULL,
invited_members BIGINT NOT NULL,
left_members BIGINT NOT NULL,
banned_members BIGINT NOT NULL,
local_users_in_room BIGINT NOT NULL,
-- These stats are per time slice
total_events BIGINT NOT NULL,
total_event_bytes BIGINT NOT NULL,
PRIMARY KEY (room_id, end_ts)
);
-- We use this index to speed up deletion of ancient room stats.
CREATE INDEX room_stats_historical_end_ts ON room_stats_historical (end_ts);
-- represents PRESENT statistics for a user
-- only holds absolute fields
DROP TABLE IF EXISTS user_stats_current;
CREATE TABLE user_stats_current (
user_id TEXT NOT NULL PRIMARY KEY,
joined_rooms BIGINT NOT NULL,
-- The maximum delta stream position that this row takes into account.
completed_delta_stream_id BIGINT NOT NULL
);
-- represents HISTORICAL statistics for a user
DROP TABLE IF EXISTS user_stats_historical;
CREATE TABLE user_stats_historical (
user_id TEXT NOT NULL,
end_ts BIGINT NOT NULL,
bucket_size BIGINT NOT NULL,
joined_rooms BIGINT NOT NULL,
invites_sent BIGINT NOT NULL,
rooms_created BIGINT NOT NULL,
total_events BIGINT NOT NULL,
total_event_bytes BIGINT NOT NULL,
PRIMARY KEY (user_id, end_ts)
);
-- We use this index to speed up deletion of ancient user stats.
CREATE INDEX user_stats_historical_end_ts ON user_stats_historical (end_ts);
CREATE TABLE room_stats_state (
room_id TEXT NOT NULL,
name TEXT,
canonical_alias TEXT,
join_rules TEXT,
history_visibility TEXT,
encryption TEXT,
avatar TEXT,
guest_access TEXT,
is_federatable BOOLEAN,
topic TEXT
);
CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id);

File diff suppressed because it is too large Load Diff

View File

@ -13,16 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from mock import Mock
from twisted.internet import defer
from synapse.api.constants import EventTypes, Membership
from synapse import storage
from synapse.rest import admin
from synapse.rest.client.v1 import login, room
from tests import unittest
# The expected number of state events in a fresh public room.
EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM = 5
# The expected number of state events in a fresh private room.
EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM = 6
class StatsRoomTests(unittest.HomeserverTestCase):
@ -33,7 +34,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
]
def prepare(self, reactor, clock, hs):
self.store = hs.get_datastore()
self.handler = self.hs.get_stats_handler()
@ -47,7 +47,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.get_success(
self.store._simple_insert(
"background_updates",
{"update_name": "populate_stats_createtables", "progress_json": "{}"},
{"update_name": "populate_stats_prepare", "progress_json": "{}"},
)
)
self.get_success(
@ -56,7 +56,17 @@ class StatsRoomTests(unittest.HomeserverTestCase):
{
"update_name": "populate_stats_process_rooms",
"progress_json": "{}",
"depends_on": "populate_stats_createtables",
"depends_on": "populate_stats_prepare",
},
)
)
self.get_success(
self.store._simple_insert(
"background_updates",
{
"update_name": "populate_stats_process_users",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
},
)
)
@ -66,16 +76,46 @@ class StatsRoomTests(unittest.HomeserverTestCase):
{
"update_name": "populate_stats_cleanup",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
"depends_on": "populate_stats_process_users",
},
)
)
def get_all_room_state(self):
return self.store._simple_select_list(
"room_stats_state", None, retcols=("name", "topic", "canonical_alias")
)
def _get_current_stats(self, stats_type, stat_id):
table, id_col = storage.stats.TYPE_TO_TABLE[stats_type]
cols = list(storage.stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
storage.stats.PER_SLICE_FIELDS[stats_type]
)
end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000)
return self.get_success(
self.store._simple_select_one(
table + "_historical",
{id_col: stat_id, end_ts: end_ts},
cols,
allow_none=True,
)
)
def _perform_background_initial_update(self):
# Do the initial population of the stats via the background update
self._add_background_updates()
while not self.get_success(self.store.has_completed_background_updates()):
self.get_success(self.store.do_next_background_update(100), by=0.1)
def test_initial_room(self):
"""
The background updates will build the table from scratch.
"""
r = self.get_success(self.store.get_all_room_state())
r = self.get_success(self.get_all_room_state())
self.assertEqual(len(r), 0)
# Disable stats
@ -91,7 +131,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
)
# Stats disabled, shouldn't have done anything
r = self.get_success(self.store.get_all_room_state())
r = self.get_success(self.get_all_room_state())
self.assertEqual(len(r), 0)
# Enable stats
@ -104,7 +144,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
while not self.get_success(self.store.has_completed_background_updates()):
self.get_success(self.store.do_next_background_update(100), by=0.1)
r = self.get_success(self.store.get_all_room_state())
r = self.get_success(self.get_all_room_state())
self.assertEqual(len(r), 1)
self.assertEqual(r[0]["topic"], "foo")
@ -114,6 +154,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
Ingestion via notify_new_event will ignore tokens that the background
update have already processed.
"""
self.reactor.advance(86401)
self.hs.config.stats_enabled = False
@ -138,12 +179,18 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.hs.config.stats_enabled = True
self.handler.stats_enabled = True
self.store._all_done = False
self.get_success(self.store.update_stats_stream_pos(None))
self.get_success(
self.store._simple_update_one(
table="stats_incremental_position",
keyvalues={},
updatevalues={"stream_id": 0},
)
)
self.get_success(
self.store._simple_insert(
"background_updates",
{"update_name": "populate_stats_createtables", "progress_json": "{}"},
{"update_name": "populate_stats_prepare", "progress_json": "{}"},
)
)
@ -154,6 +201,8 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token)
self.helper.join(room=room_1, user=u2, tok=u2_token)
# orig_delta_processor = self.store.
# Now do the initial ingestion.
self.get_success(
self.store._simple_insert(
@ -185,8 +234,15 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token)
self.helper.join(room=room_1, user=u3, tok=u3_token)
# Get the deltas! There should be two -- day 1, and day 2.
r = self.get_success(self.store.get_deltas_for_room(room_1, 0))
# self.handler.notify_new_event()
# We need to let the delta processor advance…
self.pump(10 * 60)
# Get the slices! There should be two -- day 1, and day 2.
r = self.get_success(self.store.get_statistics_for_subject("room", room_1, 0))
self.assertEqual(len(r), 2)
# The oldest has 2 joined members
self.assertEqual(r[-1]["joined_members"], 2)
@ -194,111 +250,476 @@ class StatsRoomTests(unittest.HomeserverTestCase):
# The newest has 3
self.assertEqual(r[0]["joined_members"], 3)
def test_incorrect_state_transition(self):
def test_create_user(self):
"""
If the state transition is not one of (JOIN, INVITE, LEAVE, BAN) to
(JOIN, INVITE, LEAVE, BAN), an error is raised.
When we create a user, it should have statistics already ready.
"""
events = {
"a1": {"membership": Membership.LEAVE},
"a2": {"membership": "not a real thing"},
}
def get_event(event_id, allow_none=True):
m = Mock()
m.content = events[event_id]
d = defer.Deferred()
self.reactor.callLater(0.0, d.callback, m)
return d
def get_received_ts(event_id):
return defer.succeed(1)
self.store.get_received_ts = get_received_ts
self.store.get_event = get_event
deltas = [
{
"type": EventTypes.Member,
"state_key": "some_user",
"room_id": "room",
"event_id": "a1",
"prev_event_id": "a2",
"stream_id": 60,
}
]
f = self.get_failure(self.handler._handle_deltas(deltas), ValueError)
self.assertEqual(
f.value.args[0], "'not a real thing' is not a valid prev_membership"
)
# And the other way...
deltas = [
{
"type": EventTypes.Member,
"state_key": "some_user",
"room_id": "room",
"event_id": "a2",
"prev_event_id": "a1",
"stream_id": 100,
}
]
f = self.get_failure(self.handler._handle_deltas(deltas), ValueError)
self.assertEqual(
f.value.args[0], "'not a real thing' is not a valid membership"
)
def test_redacted_prev_event(self):
"""
If the prev_event does not exist, then it is assumed to be a LEAVE.
"""
u1 = self.register_user("u1", "pass")
u1_token = self.login("u1", "pass")
room_1 = self.helper.create_room_as(u1, tok=u1_token)
u1stats = self._get_current_stats("user", u1)
# Do the initial population of the user directory via the background update
self._add_background_updates()
self.assertIsNotNone(u1stats)
# not in any rooms by default
self.assertEqual(u1stats["joined_rooms"], 0)
def test_create_room(self):
"""
When we create a room, it should have statistics already ready.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
r1stats = self._get_current_stats("room", r1)
r2 = self.helper.create_room_as(u1, tok=u1token, is_public=False)
r2stats = self._get_current_stats("room", r2)
self.assertIsNotNone(r1stats)
self.assertIsNotNone(r2stats)
# contains the default things you'd expect in a fresh room
self.assertEqual(
r1stats["total_events"],
EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM,
"Wrong number of total_events in new room's stats!"
" You may need to update this if more state events are added to"
" the room creation process.",
)
self.assertEqual(
r2stats["total_events"],
EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM,
"Wrong number of total_events in new room's stats!"
" You may need to update this if more state events are added to"
" the room creation process.",
)
self.assertEqual(
r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
)
self.assertEqual(
r2stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM
)
self.assertEqual(r1stats["joined_members"], 1)
self.assertEqual(r1stats["invited_members"], 0)
self.assertEqual(r1stats["banned_members"], 0)
self.assertEqual(r2stats["joined_members"], 1)
self.assertEqual(r2stats["invited_members"], 0)
self.assertEqual(r2stats["banned_members"], 0)
def test_send_message_increments_total_events(self):
"""
When we send a message, it increments total_events.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.send(r1, "hiss", tok=u1token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
def test_send_state_event_nonoverwriting(self):
"""
When we send a non-overwriting state event, it increments total_events AND current_state_events
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
self.helper.send_state(
r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby"
)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.send_state(
r1, "cat.hissing", {"value": False}, tok=u1token, state_key="moggy"
)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
1,
)
def test_send_state_event_overwriting(self):
"""
When we send an overwriting state event, it increments total_events ONLY
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
self.helper.send_state(
r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby"
)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.send_state(
r1, "cat.hissing", {"value": False}, tok=u1token, state_key="tabby"
)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
0,
)
def test_join_first_time(self):
"""
When a user joins a room for the first time, total_events, current_state_events and
joined_members should increase by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
r1stats_ante = self._get_current_stats("room", r1)
self.helper.join(r1, u2, tok=u2token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
1,
)
self.assertEqual(
r1stats_post["joined_members"] - r1stats_ante["joined_members"], 1
)
def test_join_after_leave(self):
"""
When a user joins a room after being previously left, total_events and
joined_members should increase by exactly 1.
current_state_events should not increase.
left_members should decrease by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
self.helper.join(r1, u2, tok=u2token)
self.helper.leave(r1, u2, tok=u2token)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.join(r1, u2, tok=u2token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
0,
)
self.assertEqual(
r1stats_post["joined_members"] - r1stats_ante["joined_members"], +1
)
self.assertEqual(
r1stats_post["left_members"] - r1stats_ante["left_members"], -1
)
def test_invited(self):
"""
When a user invites another user, current_state_events, total_events and
invited_members should increase by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
r1stats_ante = self._get_current_stats("room", r1)
self.helper.invite(r1, u1, u2, tok=u1token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
1,
)
self.assertEqual(
r1stats_post["invited_members"] - r1stats_ante["invited_members"], +1
)
def test_join_after_invite(self):
"""
When a user joins a room after being invited, total_events and
joined_members should increase by exactly 1.
current_state_events should not increase.
invited_members should decrease by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
self.helper.invite(r1, u1, u2, tok=u1token)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.join(r1, u2, tok=u2token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
0,
)
self.assertEqual(
r1stats_post["joined_members"] - r1stats_ante["joined_members"], +1
)
self.assertEqual(
r1stats_post["invited_members"] - r1stats_ante["invited_members"], -1
)
def test_left(self):
"""
When a user leaves a room after joining, total_events and
left_members should increase by exactly 1.
current_state_events should not increase.
joined_members should decrease by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
self.helper.join(r1, u2, tok=u2token)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.leave(r1, u2, tok=u2token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
0,
)
self.assertEqual(
r1stats_post["left_members"] - r1stats_ante["left_members"], +1
)
self.assertEqual(
r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1
)
def test_banned(self):
"""
When a user is banned from a room after joining, total_events and
left_members should increase by exactly 1.
current_state_events should not increase.
banned_members should decrease by exactly 1.
"""
self._perform_background_initial_update()
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
self.helper.join(r1, u2, tok=u2token)
r1stats_ante = self._get_current_stats("room", r1)
self.helper.change_membership(r1, u1, u2, "ban", tok=u1token)
r1stats_post = self._get_current_stats("room", r1)
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
self.assertEqual(
r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
0,
)
self.assertEqual(
r1stats_post["banned_members"] - r1stats_ante["banned_members"], +1
)
self.assertEqual(
r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1
)
def test_initial_background_update(self):
"""
Test that statistics can be generated by the initial background update
handler.
This test also tests that stats rows are not created for new subjects
when stats are disabled. However, it may be desirable to change this
behaviour eventually to still keep current rows.
"""
self.hs.config.stats_enabled = False
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
# test that these subjects, which were created during a time of disabled
# stats, do not have stats.
self.assertIsNone(self._get_current_stats("room", r1))
self.assertIsNone(self._get_current_stats("user", u1))
self.hs.config.stats_enabled = True
self._perform_background_initial_update()
r1stats = self._get_current_stats("room", r1)
u1stats = self._get_current_stats("user", u1)
self.assertEqual(r1stats["joined_members"], 1)
self.assertEqual(
r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
)
self.assertEqual(u1stats["joined_rooms"], 1)
def test_incomplete_stats(self):
"""
This tests that we track incomplete statistics.
We first test that incomplete stats are incrementally generated,
following the preparation of a background regen.
We then test that these incomplete rows are completed by the background
regen.
"""
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
u2 = self.register_user("u2", "pass")
u2token = self.login("u2", "pass")
u3 = self.register_user("u3", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token, is_public=False)
# preparation stage of the initial background update
# Ugh, have to reset this flag
self.store._all_done = False
self.get_success(
self.store._simple_delete(
"room_stats_current", {"1": 1}, "test_delete_stats"
)
)
self.get_success(
self.store._simple_delete(
"user_stats_current", {"1": 1}, "test_delete_stats"
)
)
self.helper.invite(r1, u1, u2, tok=u1token)
self.helper.join(r1, u2, tok=u2token)
self.helper.invite(r1, u1, u3, tok=u1token)
self.helper.send(r1, "thou shalt yield", tok=u1token)
# now do the background updates
self.store._all_done = False
self.get_success(
self.store._simple_insert(
"background_updates",
{
"update_name": "populate_stats_process_rooms",
"progress_json": "{}",
"depends_on": "populate_stats_prepare",
},
)
)
self.get_success(
self.store._simple_insert(
"background_updates",
{
"update_name": "populate_stats_process_users",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
},
)
)
self.get_success(
self.store._simple_insert(
"background_updates",
{
"update_name": "populate_stats_cleanup",
"progress_json": "{}",
"depends_on": "populate_stats_process_users",
},
)
)
while not self.get_success(self.store.has_completed_background_updates()):
self.get_success(self.store.do_next_background_update(100), by=0.1)
events = {"a1": None, "a2": {"membership": Membership.JOIN}}
r1stats_complete = self._get_current_stats("room", r1)
u1stats_complete = self._get_current_stats("user", u1)
u2stats_complete = self._get_current_stats("user", u2)
def get_event(event_id, allow_none=True):
if events.get(event_id):
m = Mock()
m.content = events[event_id]
else:
m = None
d = defer.Deferred()
self.reactor.callLater(0.0, d.callback, m)
return d
# now we make our assertions
def get_received_ts(event_id):
return defer.succeed(1)
# check that _complete rows are complete and correct
self.assertEqual(r1stats_complete["joined_members"], 2)
self.assertEqual(r1stats_complete["invited_members"], 1)
self.store.get_received_ts = get_received_ts
self.store.get_event = get_event
self.assertEqual(
r1stats_complete["current_state_events"],
2 + EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM,
)
deltas = [
{
"type": EventTypes.Member,
"state_key": "some_user:test",
"room_id": room_1,
"event_id": "a2",
"prev_event_id": "a1",
"stream_id": 100,
}
]
# Handle our fake deltas, which has a user going from LEAVE -> JOIN.
self.get_success(self.handler._handle_deltas(deltas))
# One delta, with two joined members -- the room creator, and our fake
# user.
r = self.get_success(self.store.get_deltas_for_room(room_1, 0))
self.assertEqual(len(r), 1)
self.assertEqual(r[0]["joined_members"], 2)
self.assertEqual(u1stats_complete["joined_rooms"], 1)
self.assertEqual(u2stats_complete["joined_rooms"], 1)

View File

@ -128,8 +128,12 @@ class RestHelper(object):
return channel.json_body
def send_state(self, room_id, event_type, body, tok, expect_code=200):
path = "/_matrix/client/r0/rooms/%s/state/%s" % (room_id, event_type)
def send_state(self, room_id, event_type, body, tok, expect_code=200, state_key=""):
path = "/_matrix/client/r0/rooms/%s/state/%s/%s" % (
room_id,
event_type,
state_key,
)
if tok:
path = path + "?access_token=%s" % tok