515 lines
16 KiB
Python
515 lines
16 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
|
# Copyright 2019 The Matrix.org Foundation C.I.C.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import re
|
|
import string
|
|
import sys
|
|
from collections import namedtuple
|
|
|
|
import attr
|
|
from signedjson.key import decode_verify_key_bytes
|
|
from unpaddedbase64 import decode_base64
|
|
|
|
from synapse.api.errors import SynapseError
|
|
|
|
# define a version of typing.Collection that works on python 3.5
|
|
if sys.version_info[:3] >= (3, 6, 0):
|
|
from typing import Collection
|
|
else:
|
|
from typing import Sized, Iterable, Container, TypeVar
|
|
|
|
T_co = TypeVar("T_co", covariant=True)
|
|
|
|
class Collection(Iterable[T_co], Container[T_co], Sized):
|
|
__slots__ = ()
|
|
|
|
|
|
class Requester(
|
|
namedtuple(
|
|
"Requester", ["user", "access_token_id", "is_guest", "device_id", "app_service"]
|
|
)
|
|
):
|
|
"""
|
|
Represents the user making a request
|
|
|
|
Attributes:
|
|
user (UserID): id of the user making the request
|
|
access_token_id (int|None): *ID* of the access token used for this
|
|
request, or None if it came via the appservice API or similar
|
|
is_guest (bool): True if the user making this request is a guest user
|
|
device_id (str|None): device_id which was set at authentication time
|
|
app_service (ApplicationService|None): the AS requesting on behalf of the user
|
|
"""
|
|
|
|
def serialize(self):
|
|
"""Converts self to a type that can be serialized as JSON, and then
|
|
deserialized by `deserialize`
|
|
|
|
Returns:
|
|
dict
|
|
"""
|
|
return {
|
|
"user_id": self.user.to_string(),
|
|
"access_token_id": self.access_token_id,
|
|
"is_guest": self.is_guest,
|
|
"device_id": self.device_id,
|
|
"app_server_id": self.app_service.id if self.app_service else None,
|
|
}
|
|
|
|
@staticmethod
|
|
def deserialize(store, input):
|
|
"""Converts a dict that was produced by `serialize` back into a
|
|
Requester.
|
|
|
|
Args:
|
|
store (DataStore): Used to convert AS ID to AS object
|
|
input (dict): A dict produced by `serialize`
|
|
|
|
Returns:
|
|
Requester
|
|
"""
|
|
appservice = None
|
|
if input["app_server_id"]:
|
|
appservice = store.get_app_service_by_id(input["app_server_id"])
|
|
|
|
return Requester(
|
|
user=UserID.from_string(input["user_id"]),
|
|
access_token_id=input["access_token_id"],
|
|
is_guest=input["is_guest"],
|
|
device_id=input["device_id"],
|
|
app_service=appservice,
|
|
)
|
|
|
|
|
|
def create_requester(
|
|
user_id, access_token_id=None, is_guest=False, device_id=None, app_service=None
|
|
):
|
|
"""
|
|
Create a new ``Requester`` object
|
|
|
|
Args:
|
|
user_id (str|UserID): id of the user making the request
|
|
access_token_id (int|None): *ID* of the access token used for this
|
|
request, or None if it came via the appservice API or similar
|
|
is_guest (bool): True if the user making this request is a guest user
|
|
device_id (str|None): device_id which was set at authentication time
|
|
app_service (ApplicationService|None): the AS requesting on behalf of the user
|
|
|
|
Returns:
|
|
Requester
|
|
"""
|
|
if not isinstance(user_id, UserID):
|
|
user_id = UserID.from_string(user_id)
|
|
return Requester(user_id, access_token_id, is_guest, device_id, app_service)
|
|
|
|
|
|
def get_domain_from_id(string):
|
|
idx = string.find(":")
|
|
if idx == -1:
|
|
raise SynapseError(400, "Invalid ID: %r" % (string,))
|
|
return string[idx + 1 :]
|
|
|
|
|
|
def get_localpart_from_id(string):
|
|
idx = string.find(":")
|
|
if idx == -1:
|
|
raise SynapseError(400, "Invalid ID: %r" % (string,))
|
|
return string[1:idx]
|
|
|
|
|
|
class DomainSpecificString(namedtuple("DomainSpecificString", ("localpart", "domain"))):
|
|
"""Common base class among ID/name strings that have a local part and a
|
|
domain name, prefixed with a sigil.
|
|
|
|
Has the fields:
|
|
|
|
'localpart' : The local part of the name (without the leading sigil)
|
|
'domain' : The domain part of the name
|
|
"""
|
|
|
|
# Deny iteration because it will bite you if you try to create a singleton
|
|
# set by:
|
|
# users = set(user)
|
|
def __iter__(self):
|
|
raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
|
|
|
|
# Because this class is a namedtuple of strings and booleans, it is deeply
|
|
# immutable.
|
|
def __copy__(self):
|
|
return self
|
|
|
|
def __deepcopy__(self, memo):
|
|
return self
|
|
|
|
@classmethod
|
|
def from_string(cls, s):
|
|
"""Parse the string given by 's' into a structure object."""
|
|
if len(s) < 1 or s[0:1] != cls.SIGIL:
|
|
raise SynapseError(
|
|
400, "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL)
|
|
)
|
|
|
|
parts = s[1:].split(":", 1)
|
|
if len(parts) != 2:
|
|
raise SynapseError(
|
|
400,
|
|
"Expected %s of the form '%slocalname:domain'"
|
|
% (cls.__name__, cls.SIGIL),
|
|
)
|
|
|
|
domain = parts[1]
|
|
|
|
# This code will need changing if we want to support multiple domain
|
|
# names on one HS
|
|
return cls(localpart=parts[0], domain=domain)
|
|
|
|
def to_string(self):
|
|
"""Return a string encoding the fields of the structure object."""
|
|
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
|
|
|
|
@classmethod
|
|
def is_valid(cls, s):
|
|
try:
|
|
cls.from_string(s)
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
__repr__ = to_string
|
|
|
|
|
|
class UserID(DomainSpecificString):
|
|
"""Structure representing a user ID."""
|
|
|
|
SIGIL = "@"
|
|
|
|
|
|
class RoomAlias(DomainSpecificString):
|
|
"""Structure representing a room name."""
|
|
|
|
SIGIL = "#"
|
|
|
|
|
|
class RoomID(DomainSpecificString):
|
|
"""Structure representing a room id. """
|
|
|
|
SIGIL = "!"
|
|
|
|
|
|
class EventID(DomainSpecificString):
|
|
"""Structure representing an event id. """
|
|
|
|
SIGIL = "$"
|
|
|
|
|
|
class GroupID(DomainSpecificString):
|
|
"""Structure representing a group ID."""
|
|
|
|
SIGIL = "+"
|
|
|
|
@classmethod
|
|
def from_string(cls, s):
|
|
group_id = super(GroupID, cls).from_string(s)
|
|
if not group_id.localpart:
|
|
raise SynapseError(400, "Group ID cannot be empty")
|
|
|
|
if contains_invalid_mxid_characters(group_id.localpart):
|
|
raise SynapseError(
|
|
400, "Group ID can only contain characters a-z, 0-9, or '=_-./'"
|
|
)
|
|
|
|
return group_id
|
|
|
|
|
|
mxid_localpart_allowed_characters = set(
|
|
"_-./=" + string.ascii_lowercase + string.digits
|
|
)
|
|
|
|
|
|
def contains_invalid_mxid_characters(localpart):
|
|
"""Check for characters not allowed in an mxid or groupid localpart
|
|
|
|
Args:
|
|
localpart (basestring): the localpart to be checked
|
|
|
|
Returns:
|
|
bool: True if there are any naughty characters
|
|
"""
|
|
return any(c not in mxid_localpart_allowed_characters for c in localpart)
|
|
|
|
|
|
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
|
|
|
|
# the following is a pattern which matches '=', and bytes which are not allowed in a mxid
|
|
# localpart.
|
|
#
|
|
# It works by:
|
|
# * building a string containing the allowed characters (excluding '=')
|
|
# * escaping every special character with a backslash (to stop '-' being interpreted as a
|
|
# range operator)
|
|
# * wrapping it in a '[^...]' regex
|
|
# * converting the whole lot to a 'bytes' sequence, so that we can use it to match
|
|
# bytes rather than strings
|
|
#
|
|
NON_MXID_CHARACTER_PATTERN = re.compile(
|
|
("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode(
|
|
"ascii"
|
|
)
|
|
)
|
|
|
|
|
|
def map_username_to_mxid_localpart(username, case_sensitive=False):
|
|
"""Map a username onto a string suitable for a MXID
|
|
|
|
This follows the algorithm laid out at
|
|
https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets.
|
|
|
|
Args:
|
|
username (unicode|bytes): username to be mapped
|
|
case_sensitive (bool): true if TEST and test should be mapped
|
|
onto different mxids
|
|
|
|
Returns:
|
|
unicode: string suitable for a mxid localpart
|
|
"""
|
|
if not isinstance(username, bytes):
|
|
username = username.encode("utf-8")
|
|
|
|
# first we sort out upper-case characters
|
|
if case_sensitive:
|
|
|
|
def f1(m):
|
|
return b"_" + m.group().lower()
|
|
|
|
username = UPPER_CASE_PATTERN.sub(f1, username)
|
|
else:
|
|
username = username.lower()
|
|
|
|
# then we sort out non-ascii characters
|
|
def f2(m):
|
|
g = m.group()[0]
|
|
if isinstance(g, str):
|
|
# on python 2, we need to do a ord(). On python 3, the
|
|
# byte itself will do.
|
|
g = ord(g)
|
|
return b"=%02x" % (g,)
|
|
|
|
username = NON_MXID_CHARACTER_PATTERN.sub(f2, username)
|
|
|
|
# we also do the =-escaping to mxids starting with an underscore.
|
|
username = re.sub(b"^_", b"=5f", username)
|
|
|
|
# we should now only have ascii bytes left, so can decode back to a
|
|
# unicode.
|
|
return username.decode("ascii")
|
|
|
|
|
|
class StreamToken(
|
|
namedtuple(
|
|
"Token",
|
|
(
|
|
"room_key",
|
|
"presence_key",
|
|
"typing_key",
|
|
"receipt_key",
|
|
"account_data_key",
|
|
"push_rules_key",
|
|
"to_device_key",
|
|
"device_list_key",
|
|
"groups_key",
|
|
),
|
|
)
|
|
):
|
|
_SEPARATOR = "_"
|
|
START = None # type: StreamToken
|
|
|
|
@classmethod
|
|
def from_string(cls, string):
|
|
try:
|
|
keys = string.split(cls._SEPARATOR)
|
|
while len(keys) < len(cls._fields):
|
|
# i.e. old token from before receipt_key
|
|
keys.append("0")
|
|
return cls(*keys)
|
|
except Exception:
|
|
raise SynapseError(400, "Invalid Token")
|
|
|
|
def to_string(self):
|
|
return self._SEPARATOR.join([str(k) for k in self])
|
|
|
|
@property
|
|
def room_stream_id(self):
|
|
# TODO(markjh): Awful hack to work around hacks in the presence tests
|
|
# which assume that the keys are integers.
|
|
if type(self.room_key) is int:
|
|
return self.room_key
|
|
else:
|
|
return int(self.room_key[1:].split("-")[-1])
|
|
|
|
def is_after(self, other):
|
|
"""Does this token contain events that the other doesn't?"""
|
|
return (
|
|
(other.room_stream_id < self.room_stream_id)
|
|
or (int(other.presence_key) < int(self.presence_key))
|
|
or (int(other.typing_key) < int(self.typing_key))
|
|
or (int(other.receipt_key) < int(self.receipt_key))
|
|
or (int(other.account_data_key) < int(self.account_data_key))
|
|
or (int(other.push_rules_key) < int(self.push_rules_key))
|
|
or (int(other.to_device_key) < int(self.to_device_key))
|
|
or (int(other.device_list_key) < int(self.device_list_key))
|
|
or (int(other.groups_key) < int(self.groups_key))
|
|
)
|
|
|
|
def copy_and_advance(self, key, new_value):
|
|
"""Advance the given key in the token to a new value if and only if the
|
|
new value is after the old value.
|
|
"""
|
|
new_token = self.copy_and_replace(key, new_value)
|
|
if key == "room_key":
|
|
new_id = new_token.room_stream_id
|
|
old_id = self.room_stream_id
|
|
else:
|
|
new_id = int(getattr(new_token, key))
|
|
old_id = int(getattr(self, key))
|
|
if old_id < new_id:
|
|
return new_token
|
|
else:
|
|
return self
|
|
|
|
def copy_and_replace(self, key, new_value):
|
|
return self._replace(**{key: new_value})
|
|
|
|
|
|
StreamToken.START = StreamToken(*(["s0"] + ["0"] * (len(StreamToken._fields) - 1)))
|
|
|
|
|
|
class RoomStreamToken(namedtuple("_StreamToken", "topological stream")):
|
|
"""Tokens are positions between events. The token "s1" comes after event 1.
|
|
|
|
s0 s1
|
|
| |
|
|
[0] V [1] V [2]
|
|
|
|
Tokens can either be a point in the live event stream or a cursor going
|
|
through historic events.
|
|
|
|
When traversing the live event stream events are ordered by when they
|
|
arrived at the homeserver.
|
|
|
|
When traversing historic events the events are ordered by their depth in
|
|
the event graph "topological_ordering" and then by when they arrived at the
|
|
homeserver "stream_ordering".
|
|
|
|
Live tokens start with an "s" followed by the "stream_ordering" id of the
|
|
event it comes after. Historic tokens start with a "t" followed by the
|
|
"topological_ordering" id of the event it comes after, followed by "-",
|
|
followed by the "stream_ordering" id of the event it comes after.
|
|
"""
|
|
|
|
__slots__ = [] # type: list
|
|
|
|
@classmethod
|
|
def parse(cls, string):
|
|
try:
|
|
if string[0] == "s":
|
|
return cls(topological=None, stream=int(string[1:]))
|
|
if string[0] == "t":
|
|
parts = string[1:].split("-", 1)
|
|
return cls(topological=int(parts[0]), stream=int(parts[1]))
|
|
except Exception:
|
|
pass
|
|
raise SynapseError(400, "Invalid token %r" % (string,))
|
|
|
|
@classmethod
|
|
def parse_stream_token(cls, string):
|
|
try:
|
|
if string[0] == "s":
|
|
return cls(topological=None, stream=int(string[1:]))
|
|
except Exception:
|
|
pass
|
|
raise SynapseError(400, "Invalid token %r" % (string,))
|
|
|
|
def __str__(self):
|
|
if self.topological is not None:
|
|
return "t%d-%d" % (self.topological, self.stream)
|
|
else:
|
|
return "s%d" % (self.stream,)
|
|
|
|
|
|
class ThirdPartyInstanceID(
|
|
namedtuple("ThirdPartyInstanceID", ("appservice_id", "network_id"))
|
|
):
|
|
# Deny iteration because it will bite you if you try to create a singleton
|
|
# set by:
|
|
# users = set(user)
|
|
def __iter__(self):
|
|
raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
|
|
|
|
# Because this class is a namedtuple of strings, it is deeply immutable.
|
|
def __copy__(self):
|
|
return self
|
|
|
|
def __deepcopy__(self, memo):
|
|
return self
|
|
|
|
@classmethod
|
|
def from_string(cls, s):
|
|
bits = s.split("|", 2)
|
|
if len(bits) != 2:
|
|
raise SynapseError(400, "Invalid ID %r" % (s,))
|
|
|
|
return cls(appservice_id=bits[0], network_id=bits[1])
|
|
|
|
def to_string(self):
|
|
return "%s|%s" % (self.appservice_id, self.network_id)
|
|
|
|
__str__ = to_string
|
|
|
|
@classmethod
|
|
def create(cls, appservice_id, network_id):
|
|
return cls(appservice_id=appservice_id, network_id=network_id)
|
|
|
|
|
|
@attr.s(slots=True)
|
|
class ReadReceipt(object):
|
|
"""Information about a read-receipt"""
|
|
|
|
room_id = attr.ib()
|
|
receipt_type = attr.ib()
|
|
user_id = attr.ib()
|
|
event_ids = attr.ib()
|
|
data = attr.ib()
|
|
|
|
|
|
def get_verify_key_from_cross_signing_key(key_info):
|
|
"""Get the key ID and signedjson verify key from a cross-signing key dict
|
|
|
|
Args:
|
|
key_info (dict): a cross-signing key dict, which must have a "keys"
|
|
property that has exactly one item in it
|
|
|
|
Returns:
|
|
(str, VerifyKey): the key ID and verify key for the cross-signing key
|
|
"""
|
|
# make sure that exactly one key is provided
|
|
if "keys" not in key_info:
|
|
raise ValueError("Invalid key")
|
|
keys = key_info["keys"]
|
|
if len(keys) != 1:
|
|
raise ValueError("Invalid key")
|
|
# and return that one key
|
|
for key_id, key_data in keys.items():
|
|
return (key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)))
|