Keep track of `user_ips` and `monthly_active_users` when delegating auth (#16672)
* Describe `insert_client_ip` * Pull out client_ips and MAU tracking to BaseAuth * Define HAS_AUTHLIB once in tests sick of copypasting * Track ips and token usage when delegating auth * Test that we track MAU and user_ips * Don't track `__oidc_admin`pull/16682/head
							parent
							
								
									1a5f9bb651
								
							
						
					
					
						commit
						32a59a6495
					
				|  | @ -0,0 +1 @@ | |||
| Restore tracking of requests and monthly active users when delegating authentication to an [MSC3861](https://github.com/matrix-org/synapse/pull/16672) OIDC provider. | ||||
|  | @ -27,6 +27,8 @@ from synapse.api.errors import ( | |||
|     UnstableSpecAuthError, | ||||
| ) | ||||
| from synapse.appservice import ApplicationService | ||||
| from synapse.http import get_request_user_agent | ||||
| from synapse.http.site import SynapseRequest | ||||
| from synapse.logging.opentracing import trace | ||||
| from synapse.types import Requester, create_requester | ||||
| from synapse.util.cancellation import cancellable | ||||
|  | @ -45,6 +47,9 @@ class BaseAuth: | |||
|         self.store = hs.get_datastores().main | ||||
|         self._storage_controllers = hs.get_storage_controllers() | ||||
| 
 | ||||
|         self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips | ||||
|         self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips | ||||
| 
 | ||||
|     async def check_user_in_room( | ||||
|         self, | ||||
|         room_id: str, | ||||
|  | @ -349,3 +354,46 @@ class BaseAuth: | |||
|         return create_requester( | ||||
|             effective_user_id, app_service=app_service, device_id=effective_device_id | ||||
|         ) | ||||
| 
 | ||||
|     async def _record_request( | ||||
|         self, request: SynapseRequest, requester: Requester | ||||
|     ) -> None: | ||||
|         """Record that this request was made. | ||||
| 
 | ||||
|         This updates the client_ips and monthly_active_user tables. | ||||
|         """ | ||||
|         ip_addr = request.get_client_ip_if_available() | ||||
| 
 | ||||
|         if ip_addr and (not requester.app_service or self._track_appservice_user_ips): | ||||
|             user_agent = get_request_user_agent(request) | ||||
|             access_token = self.get_access_token_from_request(request) | ||||
| 
 | ||||
|             # XXX(quenting): I'm 95% confident that we could skip setting the | ||||
|             # device_id to "dummy-device" for appservices, and that the only impact | ||||
|             # would be some rows which whould not deduplicate in the 'user_ips' | ||||
|             # table during the transition | ||||
|             recorded_device_id = ( | ||||
|                 "dummy-device" | ||||
|                 if requester.device_id is None and requester.app_service is not None | ||||
|                 else requester.device_id | ||||
|             ) | ||||
|             await self.store.insert_client_ip( | ||||
|                 user_id=requester.authenticated_entity, | ||||
|                 access_token=access_token, | ||||
|                 ip=ip_addr, | ||||
|                 user_agent=user_agent, | ||||
|                 device_id=recorded_device_id, | ||||
|             ) | ||||
| 
 | ||||
|             # Track also the puppeted user client IP if enabled and the user is puppeting | ||||
|             if ( | ||||
|                 requester.user.to_string() != requester.authenticated_entity | ||||
|                 and self._track_puppeted_user_ips | ||||
|             ): | ||||
|                 await self.store.insert_client_ip( | ||||
|                     user_id=requester.user.to_string(), | ||||
|                     access_token=access_token, | ||||
|                     ip=ip_addr, | ||||
|                     user_agent=user_agent, | ||||
|                     device_id=requester.device_id, | ||||
|                 ) | ||||
|  |  | |||
|  | @ -22,7 +22,6 @@ from synapse.api.errors import ( | |||
|     InvalidClientTokenError, | ||||
|     MissingClientTokenError, | ||||
| ) | ||||
| from synapse.http import get_request_user_agent | ||||
| from synapse.http.site import SynapseRequest | ||||
| from synapse.logging.opentracing import active_span, force_tracing, start_active_span | ||||
| from synapse.types import Requester, create_requester | ||||
|  | @ -48,8 +47,6 @@ class InternalAuth(BaseAuth): | |||
|         self._account_validity_handler = hs.get_account_validity_handler() | ||||
|         self._macaroon_generator = hs.get_macaroon_generator() | ||||
| 
 | ||||
|         self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips | ||||
|         self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips | ||||
|         self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users | ||||
| 
 | ||||
|     @cancellable | ||||
|  | @ -115,9 +112,6 @@ class InternalAuth(BaseAuth): | |||
|         Once get_user_by_req has set up the opentracing span, this does the actual work. | ||||
|         """ | ||||
|         try: | ||||
|             ip_addr = request.get_client_ip_if_available() | ||||
|             user_agent = get_request_user_agent(request) | ||||
| 
 | ||||
|             access_token = self.get_access_token_from_request(request) | ||||
| 
 | ||||
|             # First check if it could be a request from an appservice | ||||
|  | @ -154,38 +148,7 @@ class InternalAuth(BaseAuth): | |||
|                             errcode=Codes.EXPIRED_ACCOUNT, | ||||
|                         ) | ||||
| 
 | ||||
|             if ip_addr and ( | ||||
|                 not requester.app_service or self._track_appservice_user_ips | ||||
|             ): | ||||
|                 # XXX(quenting): I'm 95% confident that we could skip setting the | ||||
|                 # device_id to "dummy-device" for appservices, and that the only impact | ||||
|                 # would be some rows which whould not deduplicate in the 'user_ips' | ||||
|                 # table during the transition | ||||
|                 recorded_device_id = ( | ||||
|                     "dummy-device" | ||||
|                     if requester.device_id is None and requester.app_service is not None | ||||
|                     else requester.device_id | ||||
|                 ) | ||||
|                 await self.store.insert_client_ip( | ||||
|                     user_id=requester.authenticated_entity, | ||||
|                     access_token=access_token, | ||||
|                     ip=ip_addr, | ||||
|                     user_agent=user_agent, | ||||
|                     device_id=recorded_device_id, | ||||
|                 ) | ||||
| 
 | ||||
|                 # Track also the puppeted user client IP if enabled and the user is puppeting | ||||
|                 if ( | ||||
|                     requester.user.to_string() != requester.authenticated_entity | ||||
|                     and self._track_puppeted_user_ips | ||||
|                 ): | ||||
|                     await self.store.insert_client_ip( | ||||
|                         user_id=requester.user.to_string(), | ||||
|                         access_token=access_token, | ||||
|                         ip=ip_addr, | ||||
|                         user_agent=user_agent, | ||||
|                         device_id=requester.device_id, | ||||
|                     ) | ||||
|             await self._record_request(request, requester) | ||||
| 
 | ||||
|             if requester.is_guest and not allow_guest: | ||||
|                 raise AuthError( | ||||
|  |  | |||
|  | @ -227,6 +227,10 @@ class MSC3861DelegatedAuth(BaseAuth): | |||
|             # so that we don't provision the user if they don't have enough permission: | ||||
|             requester = await self.get_user_by_access_token(access_token, allow_expired) | ||||
| 
 | ||||
|         # Do not record requests from MAS using the virtual `__oidc_admin` user. | ||||
|         if access_token != self._admin_token: | ||||
|             await self._record_request(request, requester) | ||||
| 
 | ||||
|         if not allow_guest and requester.is_guest: | ||||
|             raise OAuthInsufficientScopeError([SCOPE_MATRIX_API]) | ||||
| 
 | ||||
|  |  | |||
|  | @ -589,6 +589,27 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke | |||
|         device_id: Optional[str], | ||||
|         now: Optional[int] = None, | ||||
|     ) -> None: | ||||
|         """Record that `user_id` used `access_token` from this `ip` address. | ||||
| 
 | ||||
|         This method does two things. | ||||
| 
 | ||||
|         1. It queues up a row to be upserted into the `client_ips` table. These happen | ||||
|            periodically; see _update_client_ips_batch. | ||||
|         2. It immediately records this user as having taken action for the purposes of | ||||
|            MAU tracking. | ||||
| 
 | ||||
|         Any DB writes take place on the background tasks worker, falling back to the | ||||
|         main process. If we're not that worker, this method emits a replication payload | ||||
|         to run this logic on that worker. | ||||
| 
 | ||||
|         Two caveats to note: | ||||
| 
 | ||||
|          - We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the | ||||
|            DB with writes. | ||||
|          - Requests using the sliding-sync proxy's user agent are excluded, as its | ||||
|            requests are not directly driven by end-users. This is a hack and we're not | ||||
|            very proud of it. | ||||
|         """ | ||||
|         # The sync proxy continuously triggers /sync even if the user is not | ||||
|         # present so should be excluded from user_ips entries. | ||||
|         if user_agent == "sync-v3-proxy-": | ||||
|  |  | |||
|  | @ -22,15 +22,7 @@ from synapse.types import JsonDict | |||
| 
 | ||||
| from tests.server import get_clock, setup_test_homeserver | ||||
| from tests.unittest import TestCase, skip_unless | ||||
| from tests.utils import default_config | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| 
 | ||||
| from tests.utils import HAS_AUTHLIB, default_config | ||||
| 
 | ||||
| # These are a few constants that are used as config parameters in the tests. | ||||
| SERVER_NAME = "test" | ||||
|  |  | |||
|  | @ -13,7 +13,8 @@ | |||
| # limitations under the License. | ||||
| 
 | ||||
| from http import HTTPStatus | ||||
| from typing import Any, Dict, Union | ||||
| from io import BytesIO | ||||
| from typing import Any, Dict, Optional, Union | ||||
| from unittest.mock import ANY, AsyncMock, Mock | ||||
| from urllib.parse import parse_qs | ||||
| 
 | ||||
|  | @ -25,6 +26,8 @@ from signedjson.key import ( | |||
| from signedjson.sign import sign_json | ||||
| 
 | ||||
| from twisted.test.proto_helpers import MemoryReactor | ||||
| from twisted.web.http_headers import Headers | ||||
| from twisted.web.iweb import IResponse | ||||
| 
 | ||||
| from synapse.api.errors import ( | ||||
|     AuthError, | ||||
|  | @ -33,23 +36,17 @@ from synapse.api.errors import ( | |||
|     OAuthInsufficientScopeError, | ||||
|     SynapseError, | ||||
| ) | ||||
| from synapse.http.site import SynapseRequest | ||||
| from synapse.rest import admin | ||||
| from synapse.rest.client import account, devices, keys, login, logout, register | ||||
| from synapse.server import HomeServer | ||||
| from synapse.types import JsonDict | ||||
| from synapse.types import JsonDict, UserID | ||||
| from synapse.util import Clock | ||||
| 
 | ||||
| from tests.server import FakeChannel | ||||
| from tests.test_utils import FakeResponse, get_awaitable_result | ||||
| from tests.unittest import HomeserverTestCase, skip_unless | ||||
| from tests.utils import mock_getRawHeaders | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| 
 | ||||
| from tests.unittest import HomeserverTestCase, override_config, skip_unless | ||||
| from tests.utils import HAS_AUTHLIB, checked_cast, mock_getRawHeaders | ||||
| 
 | ||||
| # These are a few constants that are used as config parameters in the tests. | ||||
| SERVER_NAME = "test" | ||||
|  | @ -75,6 +72,7 @@ MATRIX_DEVICE_SCOPE = MATRIX_DEVICE_SCOPE_PREFIX + DEVICE | |||
| SUBJECT = "abc-def-ghi" | ||||
| USERNAME = "test-user" | ||||
| USER_ID = "@" + USERNAME + ":" + SERVER_NAME | ||||
| OIDC_ADMIN_USERID = f"@__oidc_admin:{SERVER_NAME}" | ||||
| 
 | ||||
| 
 | ||||
| async def get_json(url: str) -> JsonDict: | ||||
|  | @ -134,7 +132,10 @@ class MSC3861OAuthDelegation(HomeserverTestCase): | |||
| 
 | ||||
|         hs = self.setup_test_homeserver(proxied_http_client=self.http_client) | ||||
| 
 | ||||
|         self.auth = hs.get_auth() | ||||
|         # Import this here so that we've checked that authlib is available. | ||||
|         from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth | ||||
| 
 | ||||
|         self.auth = checked_cast(MSC3861DelegatedAuth, hs.get_auth()) | ||||
| 
 | ||||
|         return hs | ||||
| 
 | ||||
|  | @ -675,7 +676,8 @@ class MSC3861OAuthDelegation(HomeserverTestCase): | |||
|         request.requestHeaders.getRawHeaders = mock_getRawHeaders() | ||||
|         requester = self.get_success(self.auth.get_user_by_req(request)) | ||||
|         self.assertEqual( | ||||
|             requester.user.to_string(), "@%s:%s" % ("__oidc_admin", SERVER_NAME) | ||||
|             requester.user.to_string(), | ||||
|             OIDC_ADMIN_USERID, | ||||
|         ) | ||||
|         self.assertEqual(requester.is_guest, False) | ||||
|         self.assertEqual(requester.device_id, None) | ||||
|  | @ -685,3 +687,102 @@ class MSC3861OAuthDelegation(HomeserverTestCase): | |||
| 
 | ||||
|         # There should be no call to the introspection endpoint | ||||
|         self.http_client.request.assert_not_called() | ||||
| 
 | ||||
|     @override_config({"mau_stats_only": True}) | ||||
|     def test_request_tracking(self) -> None: | ||||
|         """Using an access token should update the client_ips and MAU tables.""" | ||||
|         # To start, there are no MAU users. | ||||
|         store = self.hs.get_datastores().main | ||||
|         mau = self.get_success(store.get_monthly_active_count()) | ||||
|         self.assertEqual(mau, 0) | ||||
| 
 | ||||
|         known_token = "token-token-GOOD-:)" | ||||
| 
 | ||||
|         async def mock_http_client_request( | ||||
|             method: str, | ||||
|             uri: str, | ||||
|             data: Optional[bytes] = None, | ||||
|             headers: Optional[Headers] = None, | ||||
|         ) -> IResponse: | ||||
|             """Mocked auth provider response.""" | ||||
|             assert method == "POST" | ||||
|             token = parse_qs(data)[b"token"][0].decode("utf-8") | ||||
|             if token == known_token: | ||||
|                 return FakeResponse.json( | ||||
|                     code=200, | ||||
|                     payload={ | ||||
|                         "active": True, | ||||
|                         "scope": MATRIX_USER_SCOPE, | ||||
|                         "sub": SUBJECT, | ||||
|                         "username": USERNAME, | ||||
|                     }, | ||||
|                 ) | ||||
| 
 | ||||
|             return FakeResponse.json(code=200, payload={"active": False}) | ||||
| 
 | ||||
|         self.http_client.request = mock_http_client_request | ||||
| 
 | ||||
|         EXAMPLE_IPV4_ADDR = "123.123.123.123" | ||||
|         EXAMPLE_USER_AGENT = "httprettygood" | ||||
| 
 | ||||
|         # First test a known access token | ||||
|         channel = FakeChannel(self.site, self.reactor) | ||||
|         # type-ignore: FakeChannel is a mock of an HTTPChannel, not a proper HTTPChannel | ||||
|         req = SynapseRequest(channel, self.site)  # type: ignore[arg-type] | ||||
|         req.client.host = EXAMPLE_IPV4_ADDR | ||||
|         req.requestHeaders.addRawHeader("Authorization", f"Bearer {known_token}") | ||||
|         req.requestHeaders.addRawHeader("User-Agent", EXAMPLE_USER_AGENT) | ||||
|         req.content = BytesIO(b"") | ||||
|         req.requestReceived( | ||||
|             b"GET", | ||||
|             b"/_matrix/client/v3/account/whoami", | ||||
|             b"1.1", | ||||
|         ) | ||||
|         channel.await_result() | ||||
|         self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) | ||||
|         self.assertEqual(channel.json_body["user_id"], USER_ID, channel.json_body) | ||||
| 
 | ||||
|         # Expect to see one MAU entry, from the first request | ||||
|         mau = self.get_success(store.get_monthly_active_count()) | ||||
|         self.assertEqual(mau, 1) | ||||
| 
 | ||||
|         conn_infos = self.get_success( | ||||
|             store.get_user_ip_and_agents(UserID.from_string(USER_ID)) | ||||
|         ) | ||||
|         self.assertEqual(len(conn_infos), 1, conn_infos) | ||||
|         conn_info = conn_infos[0] | ||||
|         self.assertEqual(conn_info["access_token"], known_token) | ||||
|         self.assertEqual(conn_info["ip"], EXAMPLE_IPV4_ADDR) | ||||
|         self.assertEqual(conn_info["user_agent"], EXAMPLE_USER_AGENT) | ||||
| 
 | ||||
|         # Now test MAS making a request using the special __oidc_admin token | ||||
|         MAS_IPV4_ADDR = "127.0.0.1" | ||||
|         MAS_USER_AGENT = "masmasmas" | ||||
| 
 | ||||
|         channel = FakeChannel(self.site, self.reactor) | ||||
|         req = SynapseRequest(channel, self.site)  # type: ignore[arg-type] | ||||
|         req.client.host = MAS_IPV4_ADDR | ||||
|         req.requestHeaders.addRawHeader( | ||||
|             "Authorization", f"Bearer {self.auth._admin_token}" | ||||
|         ) | ||||
|         req.requestHeaders.addRawHeader("User-Agent", MAS_USER_AGENT) | ||||
|         req.content = BytesIO(b"") | ||||
|         req.requestReceived( | ||||
|             b"GET", | ||||
|             b"/_matrix/client/v3/account/whoami", | ||||
|             b"1.1", | ||||
|         ) | ||||
|         channel.await_result() | ||||
|         self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) | ||||
|         self.assertEqual( | ||||
|             channel.json_body["user_id"], OIDC_ADMIN_USERID, channel.json_body | ||||
|         ) | ||||
| 
 | ||||
|         # Still expect to see one MAU entry, from the first request | ||||
|         mau = self.get_success(store.get_monthly_active_count()) | ||||
|         self.assertEqual(mau, 1) | ||||
| 
 | ||||
|         conn_infos = self.get_success( | ||||
|             store.get_user_ip_and_agents(UserID.from_string(OIDC_ADMIN_USERID)) | ||||
|         ) | ||||
|         self.assertEqual(conn_infos, []) | ||||
|  |  | |||
|  | @ -19,13 +19,7 @@ from twisted.web.resource import Resource | |||
| from synapse.rest.synapse.client import build_synapse_client_resource_tree | ||||
| 
 | ||||
| from tests.unittest import HomeserverTestCase, override_config, skip_unless | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| from tests.utils import HAS_AUTHLIB | ||||
| 
 | ||||
| 
 | ||||
| @skip_unless(HAS_AUTHLIB, "requires authlib") | ||||
|  |  | |||
|  | @ -30,13 +30,7 @@ from synapse.types import JsonDict, Requester, create_requester | |||
| from tests import unittest | ||||
| from tests.http.server._base import make_request_with_cancellation_test | ||||
| from tests.unittest import override_config | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| from tests.utils import HAS_AUTHLIB | ||||
| 
 | ||||
| 
 | ||||
| class KeyQueryTestCase(unittest.HomeserverTestCase): | ||||
|  |  | |||
|  | @ -16,13 +16,7 @@ from twisted.web.resource import Resource | |||
| from synapse.rest.well_known import well_known_resource | ||||
| 
 | ||||
| from tests import unittest | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| from tests.utils import HAS_AUTHLIB | ||||
| 
 | ||||
| 
 | ||||
| class WellKnownTests(unittest.HomeserverTestCase): | ||||
|  |  | |||
|  | @ -30,6 +30,13 @@ from synapse.storage.database import LoggingDatabaseConnection | |||
| from synapse.storage.engines import create_engine | ||||
| from synapse.storage.prepare_database import prepare_database | ||||
| 
 | ||||
| try: | ||||
|     import authlib  # noqa: F401 | ||||
| 
 | ||||
|     HAS_AUTHLIB = True | ||||
| except ImportError: | ||||
|     HAS_AUTHLIB = False | ||||
| 
 | ||||
| # set this to True to run the tests against postgres instead of sqlite. | ||||
| # | ||||
| # When running under postgres, we first create a base database with the name | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 David Robertson
						David Robertson