Added example answers for chapters 13 and 14 to fix #1

2022-08-31 20:49:02 +02:00
parent 1c106d776c
commit 82cf71ed1c
20 changed files with 690 additions and 0 deletions
@@ -0,0 +1,76 @@
 # Try to create a `asyncio` base class that automatically
 # registers all instances for easy closing/destructuring when you
 # are done
 import abc
 import asyncio
 class AsyncBase(abc.ABC):
    _instances = []
    def __init__(self):
        self._instances.append(self)
    async def close(self):
        raise NotImplementedError
 class AsyncManager(AsyncBase):
    # Use a separate class for the managing of the instances so
    # we don't pollute the namespace of the base class
    @classmethod
    async def close(cls):
        # Make sure to clear the list of instances while closing
        while cls._instances:
            await cls._instances.pop().close()
    # Support `async with` syntax as well
    async def __aenter__(self):
        return self
    async def __aexit__(self, exc_type, exc, tb):
        await self.close()
 class A(AsyncBase):
    def __init__(self):
        super().__init__()
        print('A.__init__')
    async def close(self):
        print('A.close')
 class B(AsyncBase):
    def __init__(self):
        super().__init__()
        print('B.__init__')
    async def close(self):
        print('B.close')
 async def main():
    print('Using close method directly')
    A()
    B()
    await AsyncManager.close()
    print()
 async def main_with():
    print('Using async with')
    async with AsyncManager():
        A()
        B()
    print()
 if __name__ == '__main__':
    asyncio.run(main())
    asyncio.run(main_with())
@@ -0,0 +1,92 @@
 # Create an `asyncio` wrapper class for a synchronous process
 # such as file or network operations using executors
 # This example shows an `AsyncioFile` class that makes your file
 # operations asynchronous by running them in a separate thread.
 # If your operation has a tendency to block the Python GIL you
 # could also opt for using a ProcessPoolExecutor instead.
 #
 # Note that for real-life usage I would recommend the aiofiles
 # module over this class.
 import asyncio
 import concurrent.futures
 import functools
 import pathlib
 from asyncio import AbstractEventLoop
 from concurrent.futures import ThreadPoolExecutor
 class AsyncExecutorBase:
    _executor: ThreadPoolExecutor
    _loop: AbstractEventLoop
    def __init__(self):
        self._executor = concurrent.futures.ThreadPoolExecutor()
        self._loop = asyncio.get_running_loop()
        super().__init__()
    def _run_in_executor(self, func, *args, **kwargs):
        # Note that this method is not async but can be awaited
        # because it returns a coroutine. Alternatively, we could
        # have made this method async and used `await` before
        # returning
        return self._loop.run_in_executor(
            self._executor,
            functools.partial(func, *args, **kwargs),
        )
 class AsyncioFile(AsyncExecutorBase):
    _path: pathlib.Path
    def __init__(self, path: pathlib.Path):
        super().__init__()
        self._path = path
    async def exists(self) -> bool:
        return await self._run_in_executor(self._path.exists)
    async def rename(self, target):
        return await self._run_in_executor(
            self._path.rename,
            target,
        )
    async def read_text(self, encoding=None, errors=None):
        return await self._run_in_executor(
            self._path.read_text,
            encoding=encoding,
            errors=errors,
        )
    async def read_bytes(self):
        return await self._run_in_executor(self._path.read_bytes)
    async def write_text(self, data, encoding=None, errors=None,
                         newline=None):
        return await self._run_in_executor(
            self._path.write_text,
            data,
            encoding=encoding,
            errors=errors,
            newline=newline,
        )
    async def write_bytes(self, data):
        return await self._run_in_executor(
            self._path.write_bytes,
            data,
        )
 async def main():
    afile = AsyncioFile(pathlib.Path(__file__))
    print('#' * 79)
    print('Exists:', await afile.exists())
    print('#' * 79)
    print('Contents:')
    print(await afile.read_text())
 if __name__ == '__main__':
    asyncio.run(main())
@@ -0,0 +1,40 @@
 # See if you can make an echo server and client as separate
 # processes. Even though we did not cover
 # `multiprocessing.Pipe()`, I trust you can work with it
 # regardless. It can be created through
 # `a, b = multiprocessing.Pipe()` and you can use it with
 # `a.send()` or `b.send()` and `a.recv()` or `b.recv()`.
 import multiprocessing
 def echo_client(receive_pipe, send_pipe, message):
    print('client sending', message)
    send_pipe.send(message)
    print('client received', receive_pipe.recv())
 def echo_server(receive_pipe, send_pipe):
    while True:
        message = receive_pipe.recv()
        print('server received', message)
        send_pipe.send(message)
 if __name__ == '__main__':
    a, b = multiprocessing.Pipe()
    server = multiprocessing.Process(
        target=echo_server,
        args=(a, b),
    )
    server.start()
    for i in range(5):
        client = multiprocessing.Process(
            target=echo_client,
            args=(a, b, f'message {i}'),
        )
        client.start()
        client.join()
    server.terminate()
    server.join()
@@ -0,0 +1,74 @@
 # Read all files in a directory and sum the size of the files by
 # reading each file using `concurrent.futures`. If you want an
 # extra challenge, walk through the directories recursively by
 # letting the thread/process queue new items while running.
 import concurrent.futures
 import logging
 import pathlib
 import time
 # Our current directory
 PATH = pathlib.Path(__file__).parent
 def get_size(path: pathlib.Path) -> int:
    size = path.stat().st_size
    logging.info('%s is %d bytes', path, size)
    return size
 def get_total_size(path) -> int:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        return sum(executor.map(get_size, path.iterdir()))
 def get_size_or_queue(
        executor: concurrent.futures.Executor,
        futures: list[concurrent.futures.Future],
        path: pathlib.Path,
 ) -> int:
    # If the path is a directory, queue up the children
    if path.is_dir():
        for child in path.iterdir():
            futures.append(executor.submit(
                get_size_or_queue, executor, futures, child))
        # A directory has size 0 but we recurse into it
        return 0
    else:
        return get_size(path)
 def get_total_size_recursive(path) -> int:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        # Note that we are using a regular list as a queue. This is
        # thread-safe because `list.append()` is atomic.
        futures.append(executor.submit(
            get_size_or_queue, executor, futures, path))
        total_size = 0
        for future in futures:
            total_size += future.result()
        return total_size
 def main(path: pathlib.Path):
    total_size = get_total_size(path)
    print(f'Total size for {path} is: {total_size}')
    # Sleep so editors such as Pycharm don't mix the output
    time.sleep(0.5)
    total_size = get_total_size_recursive(path)
    print(f'Recursive total size for {path} is: {total_size}')
 if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    # Use the parent directory to get a reasonable list of files
    main(PATH.parent)
@@ -0,0 +1,35 @@
 # Read all files in a directory and sum the size of the files by
 # reading each file using `multiprocessing`
 import logging
 import multiprocessing
 import pathlib
 # Directory to process
 PATH = pathlib.Path(__file__).parent.parent
 # We need to setup the logging outside of the
 # `if __name__ == '__main__'` block because the
 # `multiprocessing` module will not execute that section.
 logging.basicConfig(level=logging.INFO)
 def get_size(path: pathlib.Path):
    size = path.stat().st_size
    logging.info(
        '%s is %d bytes',
        path.relative_to(PATH),
        size,
    )
    return size
 def main(path: pathlib.Path):
    with multiprocessing.Pool() as pool:
        total_size = sum(pool.map(get_size, path.iterdir()))
        print(f'Total size for {path} is: {total_size}')
 if __name__ == '__main__':
    main(PATH)
@@ -0,0 +1,45 @@
 # Read all files in a directory and sum the size of the files by
 # reading each file using `threading`
 import logging
 import pathlib
 import threading
 # Directory to process
 PATH = pathlib.Path(__file__).parent.parent
 class FileSizeThread(threading.Thread):
    def __init__(self, path: pathlib.Path):
        super().__init__()
        self.path = path
        self.size = 0
    def run(self):
        self.size = self.path.stat().st_size
        logging.info(
            '%s is %d bytes',
            self.path.relative_to(PATH),
            self.size,
        )
 def main(path: pathlib.Path):
    threads = []
    for child in path.iterdir():
        thread = FileSizeThread(child)
        thread.start()
        threads.append(thread)
    total_size = 0
    for thread in threads:
        thread.join()
        total_size += thread.size
    print(f'Total size for {path} is: {total_size}')
 if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    main(PATH)
@@ -0,0 +1,83 @@
 # Read all files in a directory and sum the size of the files by reading each file using `processing` or `multiprocessing`
 import logging
 import multiprocessing
 import pathlib
 # Directory to process
 PATH = pathlib.Path(__file__).parent.parent
 WORKERS = 8
 POLL_INTERVAL = 0.25
 # We need to setup the logging outside of the
 # `if __name__ == '__main__'` block because the
 # `multiprocessing` module will not execute that section.
 logging.basicConfig(level=logging.INFO)
 class FileSizeProcess(multiprocessing.Process):
    size: multiprocessing.Value
    queue: multiprocessing.Queue
    def __init__(self, size, queue):
        super().__init__()
        self.queue = queue
        self.size = size
    def run(self):
        while True:
            path = self.queue.get()
            total_size = 0
            # Walk through the directory and sum the filesizes
            # for files and queue up directories
            child: pathlib.Path
            for child in path.iterdir():
                if child.is_dir():
                    self.queue.put(child)
                else:
                    size = child.stat().st_size
                    total_size += size
                    logging.info(
                        '%s is %d bytes',
                        child.relative_to(PATH),
                        size,
                    )
            # Update the size in the shared memory. Since this is a
            # relatively slow operation we do it once per loop
            self.size.value += total_size
            # The JoinableQueue requires us to tell it that we are
            # done with the item
            self.queue.task_done()
 def main(path: pathlib.Path):
    processs = []
    q = multiprocessing.JoinableQueue()
    q.put(path)
    total_size = multiprocessing.Value('i', 0)
    # Create, start and store the worker processs
    for _ in range(WORKERS):
        process = FileSizeProcess(total_size, q)
        process.start()
        processs.append(process)
    # Wait until all the items in the queue have been processed
    q.join()
    q.close()
    # Terminate all the processs
    for process in processs:
        process.terminate()
        process.join()
    # Wait for all processs to finish and sum their sizes
    print(f'Total size for {path} is: {total_size.value}')
 if __name__ == '__main__':
    main(PATH)
@@ -0,0 +1,85 @@
 # Read all files in a directory and sum the size of the files by
 # reading each file using `threading` or `multiprocessing`
 #
 # As above, but walk through the directories recursively by
 # letting the thread/process queue new items while running.
 import logging
 import pathlib
 import queue
 import threading
 # Directory to process
 PATH = pathlib.Path(__file__).parent.parent
 WORKERS = 8
 POLL_INTERVAL = 0.25
 class FileSizeThread(threading.Thread):
    # Create a `stop` event so we can stop the thread externally
    stop: threading.Event
    size: int
    queue: queue.Queue
    def __init__(self, queue):
        super().__init__()
        self.queue = queue
        self.size = 0
        self.stop = threading.Event()
    def run(self):
        while not self.stop.is_set():
            # Get the next item from the queue if available. If the
            # queue is empty, wait for 0.25 second and try again
            # unless we are told to stop.
            try:
                path = self.queue.get(timeout=POLL_INTERVAL)
            except queue.Empty:
                continue
            # Walk through the directory and sum the filesizes
            # for files and queue up directories
            for child in path.iterdir():
                self.process_path(child)
    def process_path(self, child):
        if child.is_dir():
            self.queue.put(child)
        else:
            size = child.stat().st_size
            self.size += size
            logging.info(
                '%s is %d bytes',
                child.relative_to(PATH),
                size,
            )
 def main(path: pathlib.Path):
    threads = []
    q = queue.Queue()
    q.put(path)
    # Create, start and store the worker threads
    for _ in range(WORKERS):
        thread = FileSizeThread(q)
        thread.start()
        threads.append(thread)
    # Stop all threads
    for thread in threads:
        thread.stop.set()
    # Wait for all threads to finish and sum their sizes
    total_size = 0
    for thread in threads:
        thread.join()
        total_size += thread.size
    print(f'Total size for {path} is: {total_size}')
 if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    main(PATH)
@@ -0,0 +1,5 @@
 # Create a pool of workers that keeps waiting for items to be
 # queued through `multiprocessing.Queue()`.
 # Please refer to exercise_04/multiprocessing_solution_00.py for
 # the solution to the exercise as it already uses this technique.
@@ -0,0 +1,50 @@
 # Convert the pool above to a safe RPC (remote procedure call)
 # type operation.
 import multiprocessing
 WORKERS = 4
 def say(msg):
    print(f'Saying: {msg}')
 # Explicitly define the RPC methods to make this safer
 RPC_METHODS = dict(say=say)
 class RpcProcess(multiprocessing.Process):
    def __init__(self, queue: multiprocessing.Queue):
        super().__init__()
        self.queue = queue
    def run(self):
        while True:
            func_name, args, kwargs = self.queue.get()
            func = RPC_METHODS[func_name]
            func(*args, **kwargs)
            self.queue.task_done()
 def main():
    q = multiprocessing.JoinableQueue()
    q.put(('say', ('hello',), {}))
    q.put(('say', ('world',), {}))
    # This should result in an error because this is not a valid
    # RPC method
    q.put(('non-existing-method', (), {}))
    for _ in range(WORKERS):
        p = RpcProcess(q)
        p.start()
    q.join()
    q.close()
    for p in multiprocessing.active_children():
        p.terminate()
        p.join()
 if __name__ == '__main__':
    main()
@@ -0,0 +1,105 @@
 # Apply your functional programming skills and calculate
 # something in a parallel way. Perhaps parallel sorting?
 import multiprocessing
 import random
 # Since sorting is CPU limited we should not go above the number of
 # CPU cores
 WORKERS = multiprocessing.cpu_count()
 def merge_sort(data):
    if len(data) <= 1:
        return data
    middle = len(data) // 2
    left = merge_sort(data[:middle])
    right = merge_sort(data[middle:])
    return merge(left, right)
 def merge(left, right):
    '''
    Merge two sorted lists into one sorted list
    >>> merge([1, 3, 5], [2, 4, 6])
    [1, 2, 3, 4, 5, 6]
    >>> merge([1, 3, 5], [2, 4, 6, 7])
    [1, 2, 3, 4, 5, 6, 7]
    >>> merge([1, 2, 3], [1, 2, 3])
    [1, 1, 2, 2, 3, 3]
    '''
    result = []
    left_index = right_index = 0
    # When using iterators, we can avoid the IndexError of
    # accessing a non-existing element by using the `next`
    # function. This will raise a `StopIteration` error if
    # there are no more elements to iterate over.
    left_next = left[left_index]
    right_next = right[right_index]
    while True:
        try:
            if left_next <= right_next:
                result.append(left_next)
                left_index += 1
                left_next = left[left_index]
            else:
                result.append(right_next)
                right_index += 1
                right_next = right[right_index]
        except IndexError:
            # If we get an IndexError, it means that we have
            # reached the end of one of the lists. We can
            # simply extend the result with the remaining
            # elements and break out of the loop.
            result.extend(left[left_index:] or right[right_index:])
            break
    return result
 def split(data, size=WORKERS):
    '''
    Split a list into `size` different chunks so that each chunk
    can be processed in parallel.
    '''
    chunk_size = len(data) // size
    return [data[i:i + chunk_size] for i in
            range(0, len(data), chunk_size)]
 def multiprocessing_merge_sort(data):
    # Split the data into chunks
    with multiprocessing.Pool(processes=WORKERS) as pool:
        chunks = split(data, WORKERS)
        sorted_chunks = pool.map(merge_sort, chunks)
        # Merge the chunks
        i = 0
        while len(sorted_chunks) > 1:
            # zip the chunks into pairs
            pairs = zip(sorted_chunks[::2], sorted_chunks[1::2])
            # merge the pairs in parallel
            merged_chunks = pool.starmap(merge, pairs)
            # If we have an odd number of chunks, we need to
            # add the last chunk to the merged chunks
            if len(sorted_chunks) % 2 == 1:
                merged_chunks.append(sorted_chunks[-1])
            sorted_chunks = merged_chunks
    return sorted_chunks[0]
 def main():
    data = random.sample(range(1000), 100)
    sorted_data = multiprocessing_merge_sort(data)
    # Verify that the data is sorted correctly
    assert sorted_data == sorted(data)
 if __name__ == '__main__':
    main()