From bc67e7d260631d3fa7bc78653376e15dc0771364 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:43:03 +0000 Subject: [PATCH 1/8] Add decent impl of a FileConsumer Twisted core doesn't have a general purpose one, so we need to write one ourselves. Features: - All writing happens in background thread - Supports both push and pull producers - Push producers get paused if the consumer falls behind --- synapse/util/file_consumer.py | 158 +++++++++++++++++++++++++++++++ tests/util/test_file_consumer.py | 138 +++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 synapse/util/file_consumer.py create mode 100644 tests/util/test_file_consumer.py diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py new file mode 100644 index 0000000000..de478fcb3e --- /dev/null +++ b/synapse/util/file_consumer.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads, reactor + +from synapse.util.logcontext import make_deferred_yieldable + +import Queue + + +class BackgroundFileConsumer(object): + """A consumer that writes to a file like object. Supports both push + and pull producers + + Args: + file_obj (file): The file like object to write to. Closed when + finished. + """ + + # For PushProducers pause if we have this many unwritten slices + _PAUSE_ON_QUEUE_SIZE = 5 + # And resume once the size of the queue is less than this + _RESUME_ON_QUEUE_SIZE = 2 + + def __init__(self, file_obj): + self.file_obj = file_obj + + # Producer we're registered with + self.producer = None + + # True if PushProducer, false if PullProducer + self.streaming = False + + # Queue of slices of bytes to be written. When producer calls + # unregister a final None is sent. + self.bytes_queue = Queue.Queue() + + # Deferred that is resolved when finished writing + self.finished_deferred = None + + # If the _writer thread throws an exception it gets stored here. + self._write_exception = None + + # A deferred that gets resolved when the bytes_queue gets empty. + # Mainly used for tests. + self._notify_empty_deferred = None + + def registerProducer(self, producer, streaming): + """Part of IProducer interface + + Args: + producer (IProducer) + streaming (bool): True if push based producer, False if pull + based. + """ + self.producer = producer + self.streaming = streaming + self.finished_deferred = threads.deferToThread(self._writer) + if not streaming: + self.producer.resumeProducing() + + self.paused_producer = False + + def unregisterProducer(self): + """Part of IProducer interface + """ + self.producer = None + if not self.finished_deferred.called: + self.bytes_queue.put_nowait(None) + + def write(self, bytes): + """Part of IProducer interface + """ + if self._write_exception: + raise self._write_exception + + if self.finished_deferred.called: + raise Exception("consumer has closed") + + self.bytes_queue.put_nowait(bytes) + + # If this is a pushed based consumer and the queue is getting behind + # then we pause the producer. + if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self.paused_producer = True + self.producer.pauseProducing() + + def _writer(self): + """This is run in a background thread to write to the file. + """ + try: + while self.producer or not self.bytes_queue.empty(): + # If we've paused the producer check if we should resume the + # producer. + if self.producer and self.paused_producer: + if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + reactor.callFromThread(self._resume_paused_producer) + + if self._notify_empty and self.bytes_queue.empty(): + reactor.callFromThread(self._notify_empty) + + bytes = self.bytes_queue.get() + + # If we get a None (or empty list) then that's a signal used + # to indicate we should check if we should stop. + if bytes: + self.file_obj.write(bytes) + + # If its a pull producer then we need to explicitly ask for + # more stuff. + if not self.streaming and self.producer: + reactor.callFromThread(self.producer.resumeProducing) + except Exception as e: + self._write_exception = e + raise + finally: + self.file_obj.close() + + def wait(self): + """Returns a deferred that resolves when finished writing to file + """ + return make_deferred_yieldable(self.finished_deferred) + + def _resume_paused_producer(self): + """Gets called if we should resume producing after being paused + """ + if self.paused_producer and self.producer: + self.paused_producer = False + self.producer.resumeProducing() + + def _notify_empty(self): + """Called when the _writer thread thinks the queue may be empty and + we should notify anything waiting on `wait_for_writes` + """ + if self._notify_empty_deferred and self.bytes_queue.empty(): + d = self._notify_empty_deferred + self._notify_empty_deferred = None + d.callback(None) + + def wait_for_writes(self): + """Wait for the write queue to be empty and for writes to have + finished. This is mainly useful for tests. + """ + if not self._notify_empty_deferred: + self._notify_empty_deferred = defer.Deferred() + return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py new file mode 100644 index 0000000000..8acb68f0c3 --- /dev/null +++ b/tests/util/test_file_consumer.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer +from mock import NonCallableMock + +from synapse.util.file_consumer import BackgroundFileConsumer + +from tests import unittest +from StringIO import StringIO + +import threading + + +class FileConsumerTests(unittest.TestCase): + + @defer.inlineCallbacks + def test_pull_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = DummyPullProducer() + + yield producer.register_with_consumer(consumer) + + yield producer.write_and_wait("Foo") + + self.assertEqual(string_file.getvalue(), "Foo") + + yield producer.write_and_wait("Bar") + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=[]) + + consumer.registerProducer(producer, True) + + consumer.write("Foo") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "Foo") + + consumer.write("Bar") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_producer_feedback(self): + string_file = BlockingStringWrite() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + + consumer.registerProducer(producer, True) + + with string_file.write_lock: + for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): + consumer.write("Foo") + + producer.pauseProducing.assert_called_once() + + yield consumer.wait_for_writes() + producer.resumeProducing.assert_called_once() + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + +class DummyPullProducer(object): + def __init__(self): + self.consumer = None + self.deferred = defer.Deferred() + + def resumeProducing(self): + d = self.deferred + self.deferred = defer.Deferred() + d.callback(None) + + def write_and_wait(self, bytes): + d = self.deferred + self.consumer.write(bytes) + return d + + def register_with_consumer(self, consumer): + d = self.deferred + self.consumer = consumer + self.consumer.registerProducer(self, False) + return d + + +class BlockingStringWrite(object): + def __init__(self): + self.buffer = "" + self.closed = False + self.write_lock = threading.Lock() + + def write(self, bytes): + self.buffer += bytes + + def close(self): + self.closed = True From a177325b49be4793c8ed21147f8d301a0649a2b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:02:43 +0000 Subject: [PATCH 2/8] Fix comments --- synapse/util/file_consumer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index de478fcb3e..5284c7967e 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ class BackgroundFileConsumer(object): self._notify_empty_deferred = None def registerProducer(self, producer, streaming): - """Part of IProducer interface + """Part of IConsumer interface Args: producer (IProducer) @@ -91,7 +91,7 @@ class BackgroundFileConsumer(object): self.bytes_queue.put_nowait(bytes) - # If this is a pushed based consumer and the queue is getting behind + # If this is a PushProducer and the queue is getting behind # then we pause the producer. if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: self.paused_producer = True From 28b338ed9bafc2017a635848e14a2a25b78d0016 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:04:41 +0000 Subject: [PATCH 3/8] Move definition of paused_producer to __init__ --- synapse/util/file_consumer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 5284c7967e..54c9da9573 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -43,6 +43,10 @@ class BackgroundFileConsumer(object): # True if PushProducer, false if PullProducer self.streaming = False + # For PushProducers, indicates whether we've paused the producer and + # need to call resumeProducing before we get more data. + self.paused_producer = False + # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. self.bytes_queue = Queue.Queue() @@ -71,8 +75,6 @@ class BackgroundFileConsumer(object): if not streaming: self.producer.resumeProducing() - self.paused_producer = False - def unregisterProducer(self): """Part of IProducer interface """ From 17b54389feb3855a33406149a8a59f0327bb3ad1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:05:34 +0000 Subject: [PATCH 4/8] Fix _notify_empty typo --- synapse/util/file_consumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 54c9da9573..479e480614 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -110,7 +110,7 @@ class BackgroundFileConsumer(object): if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty and self.bytes_queue.empty(): + if self._notify_empty_deferred and self.bytes_queue.empty(): reactor.callFromThread(self._notify_empty) bytes = self.bytes_queue.get() From dc519602ac0f35d39a70c91f0e6057e865a61dfc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:07:17 +0000 Subject: [PATCH 5/8] Ensure we registerProducer isn't called twice --- synapse/util/file_consumer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 479e480614..d7bbb0aeb8 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -69,6 +69,9 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ + if self.producer: + raise Exception("registerProducer called twice") + self.producer = producer self.streaming = streaming self.finished_deferred = threads.deferToThread(self._writer) From 2f18a2647b6b9cc07c3cc5f2bec3e1bab67d0eea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:10:12 +0000 Subject: [PATCH 6/8] Make all fields private --- synapse/util/file_consumer.py | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d7bbb0aeb8..d19d48665c 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -35,24 +35,24 @@ class BackgroundFileConsumer(object): _RESUME_ON_QUEUE_SIZE = 2 def __init__(self, file_obj): - self.file_obj = file_obj + self._file_obj = file_obj # Producer we're registered with - self.producer = None + self._producer = None # True if PushProducer, false if PullProducer self.streaming = False # For PushProducers, indicates whether we've paused the producer and # need to call resumeProducing before we get more data. - self.paused_producer = False + self._paused_producer = False # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. - self.bytes_queue = Queue.Queue() + self._bytes_queue = Queue.Queue() # Deferred that is resolved when finished writing - self.finished_deferred = None + self._finished_deferred = None # If the _writer thread throws an exception it gets stored here. self._write_exception = None @@ -69,21 +69,21 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ - if self.producer: + if self._producer: raise Exception("registerProducer called twice") - self.producer = producer + self._producer = producer self.streaming = streaming - self.finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = threads.deferToThread(self._writer) if not streaming: - self.producer.resumeProducing() + self._producer.resumeProducing() def unregisterProducer(self): """Part of IProducer interface """ - self.producer = None - if not self.finished_deferred.called: - self.bytes_queue.put_nowait(None) + self._producer = None + if not self._finished_deferred.called: + self._bytes_queue.put_nowait(None) def write(self, bytes): """Part of IProducer interface @@ -91,65 +91,65 @@ class BackgroundFileConsumer(object): if self._write_exception: raise self._write_exception - if self.finished_deferred.called: + if self._finished_deferred.called: raise Exception("consumer has closed") - self.bytes_queue.put_nowait(bytes) + self._bytes_queue.put_nowait(bytes) # If this is a PushProducer and the queue is getting behind # then we pause the producer. - if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: - self.paused_producer = True - self.producer.pauseProducing() + if self.streaming and self._bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self._paused_producer = True + self._producer.pauseProducing() def _writer(self): """This is run in a background thread to write to the file. """ try: - while self.producer or not self.bytes_queue.empty(): + while self._producer or not self._bytes_queue.empty(): # If we've paused the producer check if we should resume the # producer. - if self.producer and self.paused_producer: - if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + if self._producer and self._paused_producer: + if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): reactor.callFromThread(self._notify_empty) - bytes = self.bytes_queue.get() + bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used # to indicate we should check if we should stop. if bytes: - self.file_obj.write(bytes) + self._file_obj.write(bytes) # If its a pull producer then we need to explicitly ask for # more stuff. - if not self.streaming and self.producer: - reactor.callFromThread(self.producer.resumeProducing) + if not self.streaming and self._producer: + reactor.callFromThread(self._producer.resumeProducing) except Exception as e: self._write_exception = e raise finally: - self.file_obj.close() + self._file_obj.close() def wait(self): """Returns a deferred that resolves when finished writing to file """ - return make_deferred_yieldable(self.finished_deferred) + return make_deferred_yieldable(self._finished_deferred) def _resume_paused_producer(self): """Gets called if we should resume producing after being paused """ - if self.paused_producer and self.producer: - self.paused_producer = False - self.producer.resumeProducing() + if self._paused_producer and self._producer: + self._paused_producer = False + self._producer.resumeProducing() def _notify_empty(self): """Called when the _writer thread thinks the queue may be empty and we should notify anything waiting on `wait_for_writes` """ - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): d = self._notify_empty_deferred self._notify_empty_deferred = None d.callback(None) From 1432f7ccd5a01e43d0c5417f3d2f4a6a0fbf5bfb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:53:21 +0000 Subject: [PATCH 7/8] Move test stuff to tests --- synapse/util/file_consumer.py | 26 +-------------- tests/util/test_file_consumer.py | 54 +++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d19d48665c..3241035247 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, threads, reactor +from twisted.internet import threads, reactor from synapse.util.logcontext import make_deferred_yieldable @@ -57,10 +57,6 @@ class BackgroundFileConsumer(object): # If the _writer thread throws an exception it gets stored here. self._write_exception = None - # A deferred that gets resolved when the bytes_queue gets empty. - # Mainly used for tests. - self._notify_empty_deferred = None - def registerProducer(self, producer, streaming): """Part of IConsumer interface @@ -113,9 +109,6 @@ class BackgroundFileConsumer(object): if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self._bytes_queue.empty(): - reactor.callFromThread(self._notify_empty) - bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used @@ -144,20 +137,3 @@ class BackgroundFileConsumer(object): if self._paused_producer and self._producer: self._paused_producer = False self._producer.resumeProducing() - - def _notify_empty(self): - """Called when the _writer thread thinks the queue may be empty and - we should notify anything waiting on `wait_for_writes` - """ - if self._notify_empty_deferred and self._bytes_queue.empty(): - d = self._notify_empty_deferred - self._notify_empty_deferred = None - d.callback(None) - - def wait_for_writes(self): - """Wait for the write queue to be empty and for writes to have - finished. This is mainly useful for tests. - """ - if not self._notify_empty_deferred: - self._notify_empty_deferred = defer.Deferred() - return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py index 8acb68f0c3..76e2234255 100644 --- a/tests/util/test_file_consumer.py +++ b/tests/util/test_file_consumer.py @@ -14,7 +14,7 @@ # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor from mock import NonCallableMock from synapse.util.file_consumer import BackgroundFileConsumer @@ -53,7 +53,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_push_consumer(self): - string_file = StringIO() + string_file = BlockingStringWrite() consumer = BackgroundFileConsumer(string_file) try: @@ -62,14 +62,14 @@ class FileConsumerTests(unittest.TestCase): consumer.registerProducer(producer, True) consumer.write("Foo") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(1) - self.assertEqual(string_file.getvalue(), "Foo") + self.assertEqual(string_file.buffer, "Foo") consumer.write("Bar") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(2) - self.assertEqual(string_file.getvalue(), "FooBar") + self.assertEqual(string_file.buffer, "FooBar") finally: consumer.unregisterProducer() @@ -85,15 +85,22 @@ class FileConsumerTests(unittest.TestCase): try: producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + resume_deferred = defer.Deferred() + producer.resumeProducing.side_effect = lambda: resume_deferred.callback(None) + consumer.registerProducer(producer, True) + number_writes = 0 with string_file.write_lock: for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): consumer.write("Foo") + number_writes += 1 producer.pauseProducing.assert_called_once() - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(number_writes) + + yield resume_deferred producer.resumeProducing.assert_called_once() finally: consumer.unregisterProducer() @@ -131,8 +138,39 @@ class BlockingStringWrite(object): self.closed = False self.write_lock = threading.Lock() + self._notify_write_deferred = None + self._number_of_writes = 0 + def write(self, bytes): - self.buffer += bytes + with self.write_lock: + self.buffer += bytes + self._number_of_writes += 1 + + reactor.callFromThread(self._notify_write) def close(self): self.closed = True + + def _notify_write(self): + "Called by write to indicate a write happened" + with self.write_lock: + if not self._notify_write_deferred: + return + d = self._notify_write_deferred + self._notify_write_deferred = None + d.callback(None) + + @defer.inlineCallbacks + def wait_for_n_writes(self, n): + "Wait for n writes to have happened" + while True: + with self.write_lock: + if n <= self._number_of_writes: + return + + if not self._notify_write_deferred: + self._notify_write_deferred = defer.Deferred() + + d = self._notify_write_deferred + + yield d From be0dfcd4a29859f4c707c2b3cf1da38c5115d251 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:57:23 +0000 Subject: [PATCH 8/8] Do logcontexts correctly --- synapse/util/file_consumer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 3241035247..90a2608d6f 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -15,7 +15,7 @@ from twisted.internet import threads, reactor -from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn import Queue @@ -70,7 +70,7 @@ class BackgroundFileConsumer(object): self._producer = producer self.streaming = streaming - self._finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = preserve_fn(threads.deferToThread)(self._writer) if not streaming: self._producer.resumeProducing()