2016-01-07 05:26:29 +01:00
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2019-07-25 17:08:24 +02:00
|
|
|
# Copyright 2019 The Matrix.org Foundation C.I.C.
|
2014-08-12 16:10:52 +02:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2020-08-12 15:03:08 +02:00
|
|
|
import abc
|
2018-12-07 13:10:07 +01:00
|
|
|
import re
|
2018-04-04 13:08:29 +02:00
|
|
|
import string
|
2018-07-09 08:09:20 +02:00
|
|
|
from collections import namedtuple
|
2020-09-30 21:29:19 +02:00
|
|
|
from typing import (
|
|
|
|
TYPE_CHECKING,
|
|
|
|
Any,
|
2021-11-12 16:50:54 +01:00
|
|
|
ClassVar,
|
2020-09-30 21:29:19 +02:00
|
|
|
Dict,
|
|
|
|
Mapping,
|
|
|
|
MutableMapping,
|
|
|
|
Optional,
|
|
|
|
Tuple,
|
|
|
|
Type,
|
|
|
|
TypeVar,
|
2020-10-29 16:58:44 +01:00
|
|
|
Union,
|
2020-09-30 21:29:19 +02:00
|
|
|
)
|
2014-08-13 04:14:34 +02:00
|
|
|
|
2019-03-12 17:50:58 +01:00
|
|
|
import attr
|
2021-09-22 15:43:26 +02:00
|
|
|
from frozendict import frozendict
|
2019-07-25 17:08:24 +02:00
|
|
|
from signedjson.key import decode_verify_key_bytes
|
|
|
|
from unpaddedbase64 import decode_base64
|
2021-03-08 14:25:43 +01:00
|
|
|
from zope.interface import Interface
|
|
|
|
|
|
|
|
from twisted.internet.interfaces import (
|
|
|
|
IReactorCore,
|
|
|
|
IReactorPluggableNameResolver,
|
2021-11-10 21:06:54 +01:00
|
|
|
IReactorSSL,
|
2021-03-08 14:25:43 +01:00
|
|
|
IReactorTCP,
|
2021-09-10 18:03:18 +02:00
|
|
|
IReactorThreads,
|
2021-03-08 14:25:43 +01:00
|
|
|
IReactorTime,
|
|
|
|
)
|
2019-03-12 17:50:58 +01:00
|
|
|
|
2020-03-03 13:12:45 +01:00
|
|
|
from synapse.api.errors import Codes, SynapseError
|
2021-01-20 14:15:14 +01:00
|
|
|
from synapse.util.stringutils import parse_and_validate_server_name
|
2014-08-12 16:10:52 +02:00
|
|
|
|
2020-09-30 21:29:19 +02:00
|
|
|
if TYPE_CHECKING:
|
2020-10-29 16:58:44 +01:00
|
|
|
from synapse.appservice.api import ApplicationService
|
2020-09-30 21:29:19 +02:00
|
|
|
from synapse.storage.databases.main import DataStore
|
|
|
|
|
2020-01-16 14:31:22 +01:00
|
|
|
# Define a state map type from type/state_key to T (usually an event ID or
|
|
|
|
# event)
|
|
|
|
T = TypeVar("T")
|
2020-08-28 13:28:53 +02:00
|
|
|
StateKey = Tuple[str, str]
|
|
|
|
StateMap = Mapping[StateKey, T]
|
|
|
|
MutableStateMap = MutableMapping[StateKey, T]
|
2020-01-16 14:31:22 +01:00
|
|
|
|
2020-01-30 12:25:59 +01:00
|
|
|
# the type of a JSON-serialisable dict. This could be made stronger, but it will
|
|
|
|
# do for now.
|
|
|
|
JsonDict = Dict[str, Any]
|
|
|
|
|
|
|
|
|
2021-03-08 14:25:43 +01:00
|
|
|
# Note that this seems to require inheriting *directly* from Interface in order
|
|
|
|
# for mypy-zope to realize it is an interface.
|
|
|
|
class ISynapseReactor(
|
2021-09-10 18:03:18 +02:00
|
|
|
IReactorTCP,
|
2021-11-10 21:06:54 +01:00
|
|
|
IReactorSSL,
|
2021-09-10 18:03:18 +02:00
|
|
|
IReactorPluggableNameResolver,
|
|
|
|
IReactorTime,
|
|
|
|
IReactorCore,
|
|
|
|
IReactorThreads,
|
|
|
|
Interface,
|
2021-03-08 14:25:43 +01:00
|
|
|
):
|
|
|
|
"""The interfaces necessary for Synapse to function."""
|
|
|
|
|
|
|
|
|
2021-03-10 19:15:56 +01:00
|
|
|
@attr.s(frozen=True, slots=True)
|
|
|
|
class Requester:
|
2018-02-05 18:22:16 +01:00
|
|
|
"""
|
|
|
|
Represents the user making a request
|
|
|
|
|
|
|
|
Attributes:
|
2021-03-10 19:15:56 +01:00
|
|
|
user: id of the user making the request
|
|
|
|
access_token_id: *ID* of the access token used for this
|
2018-02-05 18:22:16 +01:00
|
|
|
request, or None if it came via the appservice API or similar
|
2021-03-10 19:15:56 +01:00
|
|
|
is_guest: True if the user making this request is a guest user
|
|
|
|
shadow_banned: True if the user making this request has been shadow-banned.
|
|
|
|
device_id: device_id which was set at authentication time
|
|
|
|
app_service: the AS requesting on behalf of the user
|
|
|
|
authenticated_entity: The entity that authenticated when making the request.
|
|
|
|
This is different to the user_id when an admin user or the server is
|
|
|
|
"puppeting" the user.
|
2018-02-05 18:22:16 +01:00
|
|
|
"""
|
|
|
|
|
2021-03-10 19:15:56 +01:00
|
|
|
user = attr.ib(type="UserID")
|
|
|
|
access_token_id = attr.ib(type=Optional[int])
|
|
|
|
is_guest = attr.ib(type=bool)
|
|
|
|
shadow_banned = attr.ib(type=bool)
|
|
|
|
device_id = attr.ib(type=Optional[str])
|
|
|
|
app_service = attr.ib(type=Optional["ApplicationService"])
|
|
|
|
authenticated_entity = attr.ib(type=str)
|
|
|
|
|
2018-02-05 18:22:16 +01:00
|
|
|
def serialize(self):
|
|
|
|
"""Converts self to a type that can be serialized as JSON, and then
|
|
|
|
deserialized by `deserialize`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
dict
|
|
|
|
"""
|
|
|
|
return {
|
|
|
|
"user_id": self.user.to_string(),
|
|
|
|
"access_token_id": self.access_token_id,
|
|
|
|
"is_guest": self.is_guest,
|
2020-08-14 18:37:59 +02:00
|
|
|
"shadow_banned": self.shadow_banned,
|
2018-02-05 18:22:16 +01:00
|
|
|
"device_id": self.device_id,
|
|
|
|
"app_server_id": self.app_service.id if self.app_service else None,
|
2020-10-29 16:58:44 +01:00
|
|
|
"authenticated_entity": self.authenticated_entity,
|
2018-02-05 18:22:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def deserialize(store, input):
|
|
|
|
"""Converts a dict that was produced by `serialize` back into a
|
|
|
|
Requester.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
store (DataStore): Used to convert AS ID to AS object
|
|
|
|
input (dict): A dict produced by `serialize`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Requester
|
|
|
|
"""
|
|
|
|
appservice = None
|
|
|
|
if input["app_server_id"]:
|
|
|
|
appservice = store.get_app_service_by_id(input["app_server_id"])
|
|
|
|
|
|
|
|
return Requester(
|
|
|
|
user=UserID.from_string(input["user_id"]),
|
|
|
|
access_token_id=input["access_token_id"],
|
|
|
|
is_guest=input["is_guest"],
|
2020-08-14 18:37:59 +02:00
|
|
|
shadow_banned=input["shadow_banned"],
|
2018-02-05 18:22:16 +01:00
|
|
|
device_id=input["device_id"],
|
|
|
|
app_service=appservice,
|
2020-10-29 16:58:44 +01:00
|
|
|
authenticated_entity=input["authenticated_entity"],
|
2018-02-05 18:22:16 +01:00
|
|
|
)
|
2016-07-26 17:46:53 +02:00
|
|
|
|
|
|
|
|
2019-06-20 11:32:02 +02:00
|
|
|
def create_requester(
|
2020-10-29 16:58:44 +01:00
|
|
|
user_id: Union[str, "UserID"],
|
|
|
|
access_token_id: Optional[int] = None,
|
2021-03-10 19:15:56 +01:00
|
|
|
is_guest: bool = False,
|
|
|
|
shadow_banned: bool = False,
|
2020-10-29 16:58:44 +01:00
|
|
|
device_id: Optional[str] = None,
|
|
|
|
app_service: Optional["ApplicationService"] = None,
|
|
|
|
authenticated_entity: Optional[str] = None,
|
2021-03-10 19:15:56 +01:00
|
|
|
) -> Requester:
|
2016-07-26 17:46:53 +02:00
|
|
|
"""
|
|
|
|
Create a new ``Requester`` object
|
|
|
|
|
|
|
|
Args:
|
2021-03-10 19:15:56 +01:00
|
|
|
user_id: id of the user making the request
|
|
|
|
access_token_id: *ID* of the access token used for this
|
2016-07-26 17:46:53 +02:00
|
|
|
request, or None if it came via the appservice API or similar
|
2021-03-10 19:15:56 +01:00
|
|
|
is_guest: True if the user making this request is a guest user
|
|
|
|
shadow_banned: True if the user making this request is shadow-banned.
|
|
|
|
device_id: device_id which was set at authentication time
|
|
|
|
app_service: the AS requesting on behalf of the user
|
2020-10-29 16:58:44 +01:00
|
|
|
authenticated_entity: The entity that authenticated when making the request.
|
|
|
|
This is different to the user_id when an admin user or the server is
|
|
|
|
"puppeting" the user.
|
2016-07-26 17:46:53 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
Requester
|
|
|
|
"""
|
|
|
|
if not isinstance(user_id, UserID):
|
|
|
|
user_id = UserID.from_string(user_id)
|
2020-10-29 16:58:44 +01:00
|
|
|
|
|
|
|
if authenticated_entity is None:
|
|
|
|
authenticated_entity = user_id.to_string()
|
|
|
|
|
2020-08-14 18:37:59 +02:00
|
|
|
return Requester(
|
2020-10-29 16:58:44 +01:00
|
|
|
user_id,
|
|
|
|
access_token_id,
|
|
|
|
is_guest,
|
|
|
|
shadow_banned,
|
|
|
|
device_id,
|
|
|
|
app_service,
|
|
|
|
authenticated_entity,
|
2020-08-14 18:37:59 +02:00
|
|
|
)
|
2016-01-11 16:29:57 +01:00
|
|
|
|
|
|
|
|
2021-07-13 18:08:47 +02:00
|
|
|
def get_domain_from_id(string: str) -> str:
|
2017-05-16 15:07:08 +02:00
|
|
|
idx = string.find(":")
|
|
|
|
if idx == -1:
|
2016-09-22 12:08:12 +02:00
|
|
|
raise SynapseError(400, "Invalid ID: %r" % (string,))
|
2019-06-20 11:32:02 +02:00
|
|
|
return string[idx + 1 :]
|
2016-05-09 11:36:03 +02:00
|
|
|
|
|
|
|
|
2021-07-13 18:08:47 +02:00
|
|
|
def get_localpart_from_id(string: str) -> str:
|
2017-05-31 15:29:32 +02:00
|
|
|
idx = string.find(":")
|
|
|
|
if idx == -1:
|
|
|
|
raise SynapseError(400, "Invalid ID: %r" % (string,))
|
|
|
|
return string[1:idx]
|
|
|
|
|
|
|
|
|
2020-08-12 15:03:08 +02:00
|
|
|
DS = TypeVar("DS", bound="DomainSpecificString")
|
|
|
|
|
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
|
|
|
class DomainSpecificString(metaclass=abc.ABCMeta):
|
2014-08-12 16:10:52 +02:00
|
|
|
"""Common base class among ID/name strings that have a local part and a
|
|
|
|
domain name, prefixed with a sigil.
|
|
|
|
|
|
|
|
Has the fields:
|
|
|
|
|
|
|
|
'localpart' : The local part of the name (without the leading sigil)
|
|
|
|
'domain' : The domain part of the name
|
|
|
|
"""
|
|
|
|
|
2021-11-12 16:50:54 +01:00
|
|
|
SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore
|
2020-08-12 15:03:08 +02:00
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
localpart = attr.ib(type=str)
|
|
|
|
domain = attr.ib(type=str)
|
2014-08-18 17:07:14 +02:00
|
|
|
|
2014-08-20 16:25:17 +02:00
|
|
|
# Because this class is a namedtuple of strings and booleans, it is deeply
|
|
|
|
# immutable.
|
|
|
|
def __copy__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __deepcopy__(self, memo):
|
|
|
|
return self
|
|
|
|
|
2014-08-12 16:10:52 +02:00
|
|
|
@classmethod
|
2020-08-12 15:03:08 +02:00
|
|
|
def from_string(cls: Type[DS], s: str) -> DS:
|
2014-08-12 16:10:52 +02:00
|
|
|
"""Parse the string given by 's' into a structure object."""
|
2018-08-01 16:54:06 +02:00
|
|
|
if len(s) < 1 or s[0:1] != cls.SIGIL:
|
2019-06-20 11:32:02 +02:00
|
|
|
raise SynapseError(
|
2020-03-03 13:12:45 +01:00
|
|
|
400,
|
|
|
|
"Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL),
|
|
|
|
Codes.INVALID_PARAM,
|
2019-06-20 11:32:02 +02:00
|
|
|
)
|
2014-08-12 16:10:52 +02:00
|
|
|
|
2019-06-20 11:32:02 +02:00
|
|
|
parts = s[1:].split(":", 1)
|
2014-08-12 16:10:52 +02:00
|
|
|
if len(parts) != 2:
|
2016-02-12 17:17:24 +01:00
|
|
|
raise SynapseError(
|
2019-06-20 11:32:02 +02:00
|
|
|
400,
|
|
|
|
"Expected %s of the form '%slocalname:domain'"
|
|
|
|
% (cls.__name__, cls.SIGIL),
|
2020-03-03 13:12:45 +01:00
|
|
|
Codes.INVALID_PARAM,
|
2014-08-12 16:10:52 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
domain = parts[1]
|
|
|
|
|
|
|
|
# This code will need changing if we want to support multiple domain
|
|
|
|
# names on one HS
|
2014-12-02 11:42:28 +01:00
|
|
|
return cls(localpart=parts[0], domain=domain)
|
2014-08-12 16:10:52 +02:00
|
|
|
|
2020-08-12 15:03:08 +02:00
|
|
|
def to_string(self) -> str:
|
2014-08-12 16:10:52 +02:00
|
|
|
"""Return a string encoding the fields of the structure object."""
|
|
|
|
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
|
|
|
|
|
2016-02-15 16:39:16 +01:00
|
|
|
@classmethod
|
2020-08-12 15:03:08 +02:00
|
|
|
def is_valid(cls: Type[DS], s: str) -> bool:
|
2021-01-13 13:05:16 +01:00
|
|
|
"""Parses the input string and attempts to ensure it is valid."""
|
2016-02-15 16:39:16 +01:00
|
|
|
try:
|
2021-01-13 13:05:16 +01:00
|
|
|
obj = cls.from_string(s)
|
|
|
|
# Apply additional validation to the domain. This is only done
|
|
|
|
# during is_valid (and not part of from_string) since it is
|
|
|
|
# possible for invalid data to exist in room-state, etc.
|
|
|
|
parse_and_validate_server_name(obj.domain)
|
2016-02-15 16:39:16 +01:00
|
|
|
return True
|
2017-10-23 16:52:32 +02:00
|
|
|
except Exception:
|
2016-02-15 16:39:16 +01:00
|
|
|
return False
|
|
|
|
|
2018-01-06 18:14:51 +01:00
|
|
|
__repr__ = to_string
|
2015-05-13 14:42:21 +02:00
|
|
|
|
2014-08-12 16:10:52 +02:00
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
2014-08-12 16:10:52 +02:00
|
|
|
class UserID(DomainSpecificString):
|
|
|
|
"""Structure representing a user ID."""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2014-08-12 16:10:52 +02:00
|
|
|
SIGIL = "@"
|
|
|
|
|
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
2014-08-12 16:10:52 +02:00
|
|
|
class RoomAlias(DomainSpecificString):
|
|
|
|
"""Structure representing a room name."""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2014-08-12 16:10:52 +02:00
|
|
|
SIGIL = "#"
|
|
|
|
|
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
2014-08-12 16:10:52 +02:00
|
|
|
class RoomID(DomainSpecificString):
|
2021-06-17 16:20:06 +02:00
|
|
|
"""Structure representing a room id."""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2014-08-12 16:10:52 +02:00
|
|
|
SIGIL = "!"
|
2014-08-21 11:55:54 +02:00
|
|
|
|
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
2014-10-30 18:00:11 +01:00
|
|
|
class EventID(DomainSpecificString):
|
2021-06-17 16:20:06 +02:00
|
|
|
"""Structure representing an event id."""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2014-10-30 18:00:11 +01:00
|
|
|
SIGIL = "$"
|
|
|
|
|
|
|
|
|
2021-04-23 16:46:29 +02:00
|
|
|
@attr.s(slots=True, frozen=True, repr=False)
|
2017-07-18 10:47:25 +02:00
|
|
|
class GroupID(DomainSpecificString):
|
|
|
|
"""Structure representing a group ID."""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2017-07-18 10:47:25 +02:00
|
|
|
SIGIL = "+"
|
|
|
|
|
2017-10-21 00:51:07 +02:00
|
|
|
@classmethod
|
2020-08-12 15:03:08 +02:00
|
|
|
def from_string(cls: Type[DS], s: str) -> DS:
|
2021-07-15 12:02:43 +02:00
|
|
|
group_id: DS = super().from_string(s) # type: ignore
|
2020-08-12 15:03:08 +02:00
|
|
|
|
2017-10-21 00:51:07 +02:00
|
|
|
if not group_id.localpart:
|
2020-03-03 13:12:45 +01:00
|
|
|
raise SynapseError(400, "Group ID cannot be empty", Codes.INVALID_PARAM)
|
2017-10-21 00:51:07 +02:00
|
|
|
|
|
|
|
if contains_invalid_mxid_characters(group_id.localpart):
|
|
|
|
raise SynapseError(
|
2020-03-03 13:12:45 +01:00
|
|
|
400,
|
|
|
|
"Group ID can only contain characters a-z, 0-9, or '=_-./'",
|
|
|
|
Codes.INVALID_PARAM,
|
2017-10-21 00:51:07 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
return group_id
|
|
|
|
|
2017-07-18 10:47:25 +02:00
|
|
|
|
2019-06-20 11:32:02 +02:00
|
|
|
mxid_localpart_allowed_characters = set(
|
|
|
|
"_-./=" + string.ascii_lowercase + string.digits
|
|
|
|
)
|
2017-10-21 00:37:22 +02:00
|
|
|
|
|
|
|
|
2020-11-19 20:25:17 +01:00
|
|
|
def contains_invalid_mxid_characters(localpart: str) -> bool:
|
2017-10-21 00:37:22 +02:00
|
|
|
"""Check for characters not allowed in an mxid or groupid localpart
|
|
|
|
|
|
|
|
Args:
|
2020-11-19 20:25:17 +01:00
|
|
|
localpart: the localpart to be checked
|
2017-10-21 00:37:22 +02:00
|
|
|
|
|
|
|
Returns:
|
2020-11-19 20:25:17 +01:00
|
|
|
True if there are any naughty characters
|
2017-10-21 00:37:22 +02:00
|
|
|
"""
|
2018-04-04 13:08:29 +02:00
|
|
|
return any(c not in mxid_localpart_allowed_characters for c in localpart)
|
2017-10-21 00:37:22 +02:00
|
|
|
|
2017-07-18 10:47:25 +02:00
|
|
|
|
2018-12-07 13:10:07 +01:00
|
|
|
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
|
|
|
|
|
|
|
|
# the following is a pattern which matches '=', and bytes which are not allowed in a mxid
|
|
|
|
# localpart.
|
|
|
|
#
|
|
|
|
# It works by:
|
|
|
|
# * building a string containing the allowed characters (excluding '=')
|
|
|
|
# * escaping every special character with a backslash (to stop '-' being interpreted as a
|
|
|
|
# range operator)
|
|
|
|
# * wrapping it in a '[^...]' regex
|
|
|
|
# * converting the whole lot to a 'bytes' sequence, so that we can use it to match
|
|
|
|
# bytes rather than strings
|
|
|
|
#
|
|
|
|
NON_MXID_CHARACTER_PATTERN = re.compile(
|
2019-06-20 11:32:02 +02:00
|
|
|
("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode(
|
|
|
|
"ascii"
|
|
|
|
)
|
2018-12-07 13:10:07 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-12-18 15:19:46 +01:00
|
|
|
def map_username_to_mxid_localpart(
|
|
|
|
username: Union[str, bytes], case_sensitive: bool = False
|
|
|
|
) -> str:
|
2018-12-07 13:10:07 +01:00
|
|
|
"""Map a username onto a string suitable for a MXID
|
|
|
|
|
|
|
|
This follows the algorithm laid out at
|
|
|
|
https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets.
|
|
|
|
|
|
|
|
Args:
|
2020-12-18 15:19:46 +01:00
|
|
|
username: username to be mapped
|
|
|
|
case_sensitive: true if TEST and test should be mapped
|
2018-12-07 13:10:07 +01:00
|
|
|
onto different mxids
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
unicode: string suitable for a mxid localpart
|
|
|
|
"""
|
|
|
|
if not isinstance(username, bytes):
|
2019-06-20 11:32:02 +02:00
|
|
|
username = username.encode("utf-8")
|
2018-12-07 13:10:07 +01:00
|
|
|
|
|
|
|
# first we sort out upper-case characters
|
|
|
|
if case_sensitive:
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2018-12-07 13:10:07 +01:00
|
|
|
def f1(m):
|
|
|
|
return b"_" + m.group().lower()
|
|
|
|
|
|
|
|
username = UPPER_CASE_PATTERN.sub(f1, username)
|
|
|
|
else:
|
|
|
|
username = username.lower()
|
|
|
|
|
|
|
|
# then we sort out non-ascii characters
|
|
|
|
def f2(m):
|
|
|
|
g = m.group()[0]
|
|
|
|
if isinstance(g, str):
|
|
|
|
# on python 2, we need to do a ord(). On python 3, the
|
|
|
|
# byte itself will do.
|
|
|
|
g = ord(g)
|
|
|
|
return b"=%02x" % (g,)
|
|
|
|
|
|
|
|
username = NON_MXID_CHARACTER_PATTERN.sub(f2, username)
|
|
|
|
|
|
|
|
# we also do the =-escaping to mxids starting with an underscore.
|
2019-06-20 11:32:02 +02:00
|
|
|
username = re.sub(b"^_", b"=5f", username)
|
2018-12-07 13:10:07 +01:00
|
|
|
|
|
|
|
# we should now only have ascii bytes left, so can decode back to a
|
|
|
|
# unicode.
|
2019-06-20 11:32:02 +02:00
|
|
|
return username.decode("ascii")
|
2018-12-07 13:10:07 +01:00
|
|
|
|
|
|
|
|
2021-06-17 17:23:11 +02:00
|
|
|
@attr.s(frozen=True, slots=True, order=False)
|
2020-09-08 17:48:15 +02:00
|
|
|
class RoomStreamToken:
|
|
|
|
"""Tokens are positions between events. The token "s1" comes after event 1.
|
|
|
|
|
|
|
|
s0 s1
|
|
|
|
| |
|
|
|
|
[0] V [1] V [2]
|
|
|
|
|
|
|
|
Tokens can either be a point in the live event stream or a cursor going
|
|
|
|
through historic events.
|
|
|
|
|
|
|
|
When traversing the live event stream events are ordered by when they
|
|
|
|
arrived at the homeserver.
|
|
|
|
|
|
|
|
When traversing historic events the events are ordered by their depth in
|
|
|
|
the event graph "topological_ordering" and then by when they arrived at the
|
|
|
|
homeserver "stream_ordering".
|
|
|
|
|
|
|
|
Live tokens start with an "s" followed by the "stream_ordering" id of the
|
|
|
|
event it comes after. Historic tokens start with a "t" followed by the
|
|
|
|
"topological_ordering" id of the event it comes after, followed by "-",
|
|
|
|
followed by the "stream_ordering" id of the event it comes after.
|
2020-10-07 16:15:33 +02:00
|
|
|
|
|
|
|
There is also a third mode for live tokens where the token starts with "m",
|
|
|
|
which is sometimes used when using sharded event persisters. In this case
|
|
|
|
the events stream is considered to be a set of streams (one for each writer)
|
|
|
|
and the token encodes the vector clock of positions of each writer in their
|
|
|
|
respective streams.
|
|
|
|
|
|
|
|
The format of the token in such case is an initial integer min position,
|
|
|
|
followed by the mapping of instance ID to position separated by '.' and '~':
|
|
|
|
|
|
|
|
m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ...
|
|
|
|
|
|
|
|
The `min_pos` corresponds to the minimum position all writers have persisted
|
|
|
|
up to, and then only writers that are ahead of that position need to be
|
|
|
|
encoded. An example token is:
|
|
|
|
|
|
|
|
m56~2.58~3.59
|
|
|
|
|
|
|
|
Which corresponds to a set of three (or more writers) where instances 2 and
|
|
|
|
3 (these are instance IDs that can be looked up in the DB to fetch the more
|
|
|
|
commonly used instance names) are at positions 58 and 59 respectively, and
|
|
|
|
all other instances are at position 56.
|
|
|
|
|
|
|
|
Note: The `RoomStreamToken` cannot have both a topological part and an
|
|
|
|
instance map.
|
2021-09-22 15:43:26 +02:00
|
|
|
|
|
|
|
For caching purposes, `RoomStreamToken`s and by extension, all their
|
|
|
|
attributes, must be hashable.
|
2020-09-08 17:48:15 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
topological = attr.ib(
|
|
|
|
type=Optional[int],
|
|
|
|
validator=attr.validators.optional(attr.validators.instance_of(int)),
|
2019-06-20 11:32:02 +02:00
|
|
|
)
|
2020-09-08 17:48:15 +02:00
|
|
|
stream = attr.ib(type=int, validator=attr.validators.instance_of(int))
|
|
|
|
|
2020-10-07 16:15:33 +02:00
|
|
|
instance_map = attr.ib(
|
2021-09-22 15:43:26 +02:00
|
|
|
type="frozendict[str, int]",
|
|
|
|
factory=frozendict,
|
2020-10-07 16:15:33 +02:00
|
|
|
validator=attr.validators.deep_mapping(
|
|
|
|
key_validator=attr.validators.instance_of(str),
|
|
|
|
value_validator=attr.validators.instance_of(int),
|
2021-09-22 15:43:26 +02:00
|
|
|
mapping_validator=attr.validators.instance_of(frozendict),
|
2020-10-07 16:15:33 +02:00
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
def __attrs_post_init__(self):
|
2021-02-16 23:32:34 +01:00
|
|
|
"""Validates that both `topological` and `instance_map` aren't set."""
|
2020-10-07 16:15:33 +02:00
|
|
|
|
|
|
|
if self.instance_map and self.topological:
|
|
|
|
raise ValueError(
|
|
|
|
"Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'."
|
|
|
|
)
|
|
|
|
|
2020-09-08 17:48:15 +02:00
|
|
|
@classmethod
|
2020-09-30 21:29:19 +02:00
|
|
|
async def parse(cls, store: "DataStore", string: str) -> "RoomStreamToken":
|
2020-09-08 17:48:15 +02:00
|
|
|
try:
|
|
|
|
if string[0] == "s":
|
|
|
|
return cls(topological=None, stream=int(string[1:]))
|
|
|
|
if string[0] == "t":
|
|
|
|
parts = string[1:].split("-", 1)
|
|
|
|
return cls(topological=int(parts[0]), stream=int(parts[1]))
|
2020-10-07 16:15:33 +02:00
|
|
|
if string[0] == "m":
|
|
|
|
parts = string[1:].split("~")
|
|
|
|
stream = int(parts[0])
|
|
|
|
|
|
|
|
instance_map = {}
|
|
|
|
for part in parts[1:]:
|
|
|
|
key, value = part.split(".")
|
|
|
|
instance_id = int(key)
|
|
|
|
pos = int(value)
|
|
|
|
|
|
|
|
instance_name = await store.get_name_from_instance_id(instance_id)
|
|
|
|
instance_map[instance_name] = pos
|
|
|
|
|
2021-02-16 23:32:34 +01:00
|
|
|
return cls(
|
|
|
|
topological=None,
|
|
|
|
stream=stream,
|
2021-09-22 15:43:26 +02:00
|
|
|
instance_map=frozendict(instance_map),
|
2021-02-16 23:32:34 +01:00
|
|
|
)
|
2020-09-08 17:48:15 +02:00
|
|
|
except Exception:
|
|
|
|
pass
|
2021-09-14 12:25:05 +02:00
|
|
|
raise SynapseError(400, "Invalid room stream token %r" % (string,))
|
2020-09-08 17:48:15 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def parse_stream_token(cls, string: str) -> "RoomStreamToken":
|
|
|
|
try:
|
|
|
|
if string[0] == "s":
|
|
|
|
return cls(topological=None, stream=int(string[1:]))
|
|
|
|
except Exception:
|
|
|
|
pass
|
2021-09-14 12:25:05 +02:00
|
|
|
raise SynapseError(400, "Invalid room stream token %r" % (string,))
|
2020-09-08 17:48:15 +02:00
|
|
|
|
2020-09-29 22:48:33 +02:00
|
|
|
def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken":
|
|
|
|
"""Return a new token such that if an event is after both this token and
|
|
|
|
the other token, then its after the returned token too.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if self.topological or other.topological:
|
|
|
|
raise Exception("Can't advance topological tokens")
|
|
|
|
|
|
|
|
max_stream = max(self.stream, other.stream)
|
|
|
|
|
2020-10-07 16:15:33 +02:00
|
|
|
instance_map = {
|
|
|
|
instance: max(
|
|
|
|
self.instance_map.get(instance, self.stream),
|
|
|
|
other.instance_map.get(instance, other.stream),
|
|
|
|
)
|
|
|
|
for instance in set(self.instance_map).union(other.instance_map)
|
|
|
|
}
|
|
|
|
|
2021-09-22 15:43:26 +02:00
|
|
|
return RoomStreamToken(None, max_stream, frozendict(instance_map))
|
2020-10-07 16:15:33 +02:00
|
|
|
|
|
|
|
def as_historical_tuple(self) -> Tuple[int, int]:
|
|
|
|
"""Returns a tuple of `(topological, stream)` for historical tokens.
|
|
|
|
|
|
|
|
Raises if not an historical token (i.e. doesn't have a topological part).
|
|
|
|
"""
|
|
|
|
if self.topological is None:
|
|
|
|
raise Exception(
|
|
|
|
"Cannot call `RoomStreamToken.as_historical_tuple` on live token"
|
|
|
|
)
|
2020-09-29 22:48:33 +02:00
|
|
|
|
2021-09-23 12:59:07 +02:00
|
|
|
return self.topological, self.stream
|
2020-09-08 17:48:15 +02:00
|
|
|
|
2020-10-07 16:15:33 +02:00
|
|
|
def get_stream_pos_for_instance(self, instance_name: str) -> int:
|
|
|
|
"""Get the stream position that the given writer was at at this token.
|
|
|
|
|
|
|
|
This only makes sense for "live" tokens that may have a vector clock
|
|
|
|
component, and so asserts that this is a "live" token.
|
|
|
|
"""
|
|
|
|
assert self.topological is None
|
|
|
|
|
|
|
|
# If we don't have an entry for the instance we can assume that it was
|
|
|
|
# at `self.stream`.
|
|
|
|
return self.instance_map.get(instance_name, self.stream)
|
|
|
|
|
|
|
|
def get_max_stream_pos(self) -> int:
|
|
|
|
"""Get the maximum stream position referenced in this token.
|
|
|
|
|
|
|
|
The corresponding "min" position is, by definition just `self.stream`.
|
|
|
|
|
|
|
|
This is used to handle tokens that have non-empty `instance_map`, and so
|
|
|
|
reference stream positions after the `self.stream` position.
|
|
|
|
"""
|
|
|
|
return max(self.instance_map.values(), default=self.stream)
|
|
|
|
|
2020-09-30 21:29:19 +02:00
|
|
|
async def to_string(self, store: "DataStore") -> str:
|
2020-09-08 17:48:15 +02:00
|
|
|
if self.topological is not None:
|
|
|
|
return "t%d-%d" % (self.topological, self.stream)
|
2020-10-07 16:15:33 +02:00
|
|
|
elif self.instance_map:
|
|
|
|
entries = []
|
|
|
|
for name, pos in self.instance_map.items():
|
|
|
|
instance_id = await store.get_id_for_instance(name)
|
2021-07-19 16:28:05 +02:00
|
|
|
entries.append(f"{instance_id}.{pos}")
|
2020-10-07 16:15:33 +02:00
|
|
|
|
|
|
|
encoded_map = "~".join(entries)
|
2021-07-19 16:28:05 +02:00
|
|
|
return f"m{self.stream}~{encoded_map}"
|
2020-09-08 17:48:15 +02:00
|
|
|
else:
|
|
|
|
return "s%d" % (self.stream,)
|
|
|
|
|
|
|
|
|
|
|
|
@attr.s(slots=True, frozen=True)
|
|
|
|
class StreamToken:
|
2021-09-22 15:43:26 +02:00
|
|
|
"""A collection of positions within multiple streams.
|
|
|
|
|
|
|
|
For caching purposes, `StreamToken`s and by extension, all their attributes,
|
|
|
|
must be hashable.
|
|
|
|
"""
|
|
|
|
|
2020-09-11 13:22:55 +02:00
|
|
|
room_key = attr.ib(
|
|
|
|
type=RoomStreamToken, validator=attr.validators.instance_of(RoomStreamToken)
|
|
|
|
)
|
2020-09-08 17:48:15 +02:00
|
|
|
presence_key = attr.ib(type=int)
|
|
|
|
typing_key = attr.ib(type=int)
|
|
|
|
receipt_key = attr.ib(type=int)
|
|
|
|
account_data_key = attr.ib(type=int)
|
|
|
|
push_rules_key = attr.ib(type=int)
|
|
|
|
to_device_key = attr.ib(type=int)
|
|
|
|
device_list_key = attr.ib(type=int)
|
|
|
|
groups_key = attr.ib(type=int)
|
|
|
|
|
2014-08-21 11:55:54 +02:00
|
|
|
_SEPARATOR = "_"
|
2021-07-15 12:02:43 +02:00
|
|
|
START: "StreamToken"
|
2014-08-21 11:55:54 +02:00
|
|
|
|
|
|
|
@classmethod
|
2020-09-30 21:29:19 +02:00
|
|
|
async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
|
2014-08-21 11:55:54 +02:00
|
|
|
try:
|
2014-08-29 18:31:33 +02:00
|
|
|
keys = string.split(cls._SEPARATOR)
|
2020-09-08 17:48:15 +02:00
|
|
|
while len(keys) < len(attr.fields(cls)):
|
2015-07-02 12:40:22 +02:00
|
|
|
# i.e. old token from before receipt_key
|
|
|
|
keys.append("0")
|
2020-09-30 21:29:19 +02:00
|
|
|
return cls(
|
|
|
|
await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:])
|
|
|
|
)
|
2017-10-23 16:52:32 +02:00
|
|
|
except Exception:
|
2021-09-14 12:25:05 +02:00
|
|
|
raise SynapseError(400, "Invalid stream token")
|
2014-08-21 11:55:54 +02:00
|
|
|
|
2020-09-30 21:29:19 +02:00
|
|
|
async def to_string(self, store: "DataStore") -> str:
|
|
|
|
return self._SEPARATOR.join(
|
|
|
|
[
|
|
|
|
await self.room_key.to_string(store),
|
|
|
|
str(self.presence_key),
|
|
|
|
str(self.typing_key),
|
|
|
|
str(self.receipt_key),
|
|
|
|
str(self.account_data_key),
|
|
|
|
str(self.push_rules_key),
|
|
|
|
str(self.to_device_key),
|
|
|
|
str(self.device_list_key),
|
|
|
|
str(self.groups_key),
|
|
|
|
]
|
|
|
|
)
|
2014-08-26 19:57:46 +02:00
|
|
|
|
2015-05-13 14:42:21 +02:00
|
|
|
@property
|
|
|
|
def room_stream_id(self):
|
2020-09-11 13:22:55 +02:00
|
|
|
return self.room_key.stream
|
2015-05-13 14:42:21 +02:00
|
|
|
|
2020-09-11 13:22:55 +02:00
|
|
|
def copy_and_advance(self, key, new_value) -> "StreamToken":
|
2015-05-18 14:17:36 +02:00
|
|
|
"""Advance the given key in the token to a new value if and only if the
|
|
|
|
new value is after the old value.
|
|
|
|
"""
|
|
|
|
if key == "room_key":
|
2020-09-29 22:48:33 +02:00
|
|
|
new_token = self.copy_and_replace(
|
|
|
|
"room_key", self.room_key.copy_and_advance(new_value)
|
|
|
|
)
|
|
|
|
return new_token
|
|
|
|
|
|
|
|
new_token = self.copy_and_replace(key, new_value)
|
|
|
|
new_id = int(getattr(new_token, key))
|
|
|
|
old_id = int(getattr(self, key))
|
|
|
|
|
2015-05-18 14:17:36 +02:00
|
|
|
if old_id < new_id:
|
|
|
|
return new_token
|
|
|
|
else:
|
|
|
|
return self
|
|
|
|
|
2020-09-11 13:22:55 +02:00
|
|
|
def copy_and_replace(self, key, new_value) -> "StreamToken":
|
2020-09-08 17:48:15 +02:00
|
|
|
return attr.evolve(self, **{key: new_value})
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2015-05-11 19:00:33 +02:00
|
|
|
|
2020-09-30 21:29:19 +02:00
|
|
|
StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0)
|
2016-12-06 11:43:48 +01:00
|
|
|
|
|
|
|
|
2020-09-24 14:24:17 +02:00
|
|
|
@attr.s(slots=True, frozen=True)
|
|
|
|
class PersistedEventPosition:
|
|
|
|
"""Position of a newly persisted event with instance that persisted it.
|
|
|
|
|
|
|
|
This can be used to test whether the event is persisted before or after a
|
|
|
|
RoomStreamToken.
|
|
|
|
"""
|
|
|
|
|
|
|
|
instance_name = attr.ib(type=str)
|
|
|
|
stream = attr.ib(type=int)
|
|
|
|
|
|
|
|
def persisted_after(self, token: RoomStreamToken) -> bool:
|
2020-10-07 16:15:33 +02:00
|
|
|
return token.get_stream_pos_for_instance(self.instance_name) < self.stream
|
2020-09-24 14:24:17 +02:00
|
|
|
|
2020-09-29 22:48:33 +02:00
|
|
|
def to_room_stream_token(self) -> RoomStreamToken:
|
|
|
|
"""Converts the position to a room stream token such that events
|
|
|
|
persisted in the same room after this position will be after the
|
|
|
|
returned `RoomStreamToken`.
|
|
|
|
|
2021-02-12 17:01:48 +01:00
|
|
|
Note: no guarantees are made about ordering w.r.t. events in other
|
2020-09-29 22:48:33 +02:00
|
|
|
rooms.
|
|
|
|
"""
|
|
|
|
# Doing the naive thing satisfies the desired properties described in
|
|
|
|
# the docstring.
|
|
|
|
return RoomStreamToken(None, self.stream)
|
|
|
|
|
2020-09-24 14:24:17 +02:00
|
|
|
|
2016-12-06 11:43:48 +01:00
|
|
|
class ThirdPartyInstanceID(
|
2019-06-20 11:32:02 +02:00
|
|
|
namedtuple("ThirdPartyInstanceID", ("appservice_id", "network_id"))
|
2016-12-06 11:43:48 +01:00
|
|
|
):
|
|
|
|
# Deny iteration because it will bite you if you try to create a singleton
|
|
|
|
# set by:
|
|
|
|
# users = set(user)
|
|
|
|
def __iter__(self):
|
|
|
|
raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
|
|
|
|
|
|
|
|
# Because this class is a namedtuple of strings, it is deeply immutable.
|
|
|
|
def __copy__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __deepcopy__(self, memo):
|
|
|
|
return self
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_string(cls, s):
|
|
|
|
bits = s.split("|", 2)
|
|
|
|
if len(bits) != 2:
|
|
|
|
raise SynapseError(400, "Invalid ID %r" % (s,))
|
|
|
|
|
|
|
|
return cls(appservice_id=bits[0], network_id=bits[1])
|
|
|
|
|
|
|
|
def to_string(self):
|
2019-06-20 11:32:02 +02:00
|
|
|
return "%s|%s" % (self.appservice_id, self.network_id)
|
2016-12-06 11:43:48 +01:00
|
|
|
|
|
|
|
__str__ = to_string
|
|
|
|
|
|
|
|
@classmethod
|
2019-06-20 11:32:02 +02:00
|
|
|
def create(cls, appservice_id, network_id):
|
2016-12-06 11:43:48 +01:00
|
|
|
return cls(appservice_id=appservice_id, network_id=network_id)
|
2019-03-12 17:50:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
@attr.s(slots=True)
|
2020-09-04 12:54:56 +02:00
|
|
|
class ReadReceipt:
|
2019-03-12 17:50:58 +01:00
|
|
|
"""Information about a read-receipt"""
|
2019-06-20 11:32:02 +02:00
|
|
|
|
2019-03-12 17:50:58 +01:00
|
|
|
room_id = attr.ib()
|
|
|
|
receipt_type = attr.ib()
|
|
|
|
user_id = attr.ib()
|
|
|
|
event_ids = attr.ib()
|
|
|
|
data = attr.ib()
|
2019-07-25 17:08:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_verify_key_from_cross_signing_key(key_info):
|
|
|
|
"""Get the key ID and signedjson verify key from a cross-signing key dict
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key_info (dict): a cross-signing key dict, which must have a "keys"
|
|
|
|
property that has exactly one item in it
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
(str, VerifyKey): the key ID and verify key for the cross-signing key
|
|
|
|
"""
|
|
|
|
# make sure that exactly one key is provided
|
|
|
|
if "keys" not in key_info:
|
2019-08-02 03:51:19 +02:00
|
|
|
raise ValueError("Invalid key")
|
2019-07-25 17:08:24 +02:00
|
|
|
keys = key_info["keys"]
|
|
|
|
if len(keys) != 1:
|
2019-08-02 03:51:19 +02:00
|
|
|
raise ValueError("Invalid key")
|
2019-07-25 17:08:24 +02:00
|
|
|
# and return that one key
|
|
|
|
for key_id, key_data in keys.items():
|
2021-09-23 12:59:07 +02:00
|
|
|
return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))
|
2021-08-04 12:40:25 +02:00
|
|
|
|
|
|
|
|
|
|
|
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
|
|
|
class UserInfo:
|
|
|
|
"""Holds information about a user. Result of get_userinfo_by_id.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
user_id: ID of the user.
|
|
|
|
appservice_id: Application service ID that created this user.
|
|
|
|
consent_server_notice_sent: Version of policy documents the user has been sent.
|
|
|
|
consent_version: Version of policy documents the user has consented to.
|
|
|
|
creation_ts: Creation timestamp of the user.
|
|
|
|
is_admin: True if the user is an admin.
|
|
|
|
is_deactivated: True if the user has been deactivated.
|
|
|
|
is_guest: True if the user is a guest user.
|
|
|
|
is_shadow_banned: True if the user has been shadow-banned.
|
|
|
|
user_type: User type (None for normal user, 'support' and 'bot' other options).
|
|
|
|
"""
|
|
|
|
|
|
|
|
user_id: UserID
|
|
|
|
appservice_id: Optional[int]
|
|
|
|
consent_server_notice_sent: Optional[str]
|
|
|
|
consent_version: Optional[str]
|
|
|
|
user_type: Optional[str]
|
|
|
|
creation_ts: int
|
|
|
|
is_admin: bool
|
|
|
|
is_deactivated: bool
|
|
|
|
is_guest: bool
|
|
|
|
is_shadow_banned: bool
|