Added example answers for chapters 13 and 14 to fix #1
This commit is contained in:
parent
1c106d776c
commit
82cf71ed1c
0
CH_13_async_io/exercise_01/__init__.py
Normal file
0
CH_13_async_io/exercise_01/__init__.py
Normal file
76
CH_13_async_io/exercise_01/solution_00.py
Normal file
76
CH_13_async_io/exercise_01/solution_00.py
Normal file
@ -0,0 +1,76 @@
|
||||
# Try to create a `asyncio` base class that automatically
|
||||
# registers all instances for easy closing/destructuring when you
|
||||
# are done
|
||||
import abc
|
||||
import asyncio
|
||||
|
||||
|
||||
class AsyncBase(abc.ABC):
|
||||
_instances = []
|
||||
|
||||
def __init__(self):
|
||||
self._instances.append(self)
|
||||
|
||||
async def close(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class AsyncManager(AsyncBase):
|
||||
# Use a separate class for the managing of the instances so
|
||||
# we don't pollute the namespace of the base class
|
||||
|
||||
@classmethod
|
||||
async def close(cls):
|
||||
# Make sure to clear the list of instances while closing
|
||||
while cls._instances:
|
||||
await cls._instances.pop().close()
|
||||
|
||||
# Support `async with` syntax as well
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
await self.close()
|
||||
|
||||
|
||||
class A(AsyncBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
print('A.__init__')
|
||||
|
||||
async def close(self):
|
||||
print('A.close')
|
||||
|
||||
|
||||
class B(AsyncBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
print('B.__init__')
|
||||
|
||||
async def close(self):
|
||||
print('B.close')
|
||||
|
||||
|
||||
async def main():
|
||||
print('Using close method directly')
|
||||
|
||||
A()
|
||||
B()
|
||||
await AsyncManager.close()
|
||||
|
||||
print()
|
||||
|
||||
|
||||
async def main_with():
|
||||
print('Using async with')
|
||||
|
||||
async with AsyncManager():
|
||||
A()
|
||||
B()
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
asyncio.run(main_with())
|
||||
0
CH_13_async_io/exercise_02/__init__.py
Normal file
0
CH_13_async_io/exercise_02/__init__.py
Normal file
92
CH_13_async_io/exercise_02/solution_00.py
Normal file
92
CH_13_async_io/exercise_02/solution_00.py
Normal file
@ -0,0 +1,92 @@
|
||||
# Create an `asyncio` wrapper class for a synchronous process
|
||||
# such as file or network operations using executors
|
||||
|
||||
# This example shows an `AsyncioFile` class that makes your file
|
||||
# operations asynchronous by running them in a separate thread.
|
||||
# If your operation has a tendency to block the Python GIL you
|
||||
# could also opt for using a ProcessPoolExecutor instead.
|
||||
#
|
||||
# Note that for real-life usage I would recommend the aiofiles
|
||||
# module over this class.
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import functools
|
||||
import pathlib
|
||||
from asyncio import AbstractEventLoop
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
class AsyncExecutorBase:
|
||||
_executor: ThreadPoolExecutor
|
||||
_loop: AbstractEventLoop
|
||||
|
||||
def __init__(self):
|
||||
self._executor = concurrent.futures.ThreadPoolExecutor()
|
||||
self._loop = asyncio.get_running_loop()
|
||||
super().__init__()
|
||||
|
||||
def _run_in_executor(self, func, *args, **kwargs):
|
||||
# Note that this method is not async but can be awaited
|
||||
# because it returns a coroutine. Alternatively, we could
|
||||
# have made this method async and used `await` before
|
||||
# returning
|
||||
return self._loop.run_in_executor(
|
||||
self._executor,
|
||||
functools.partial(func, *args, **kwargs),
|
||||
)
|
||||
|
||||
|
||||
class AsyncioFile(AsyncExecutorBase):
|
||||
_path: pathlib.Path
|
||||
|
||||
def __init__(self, path: pathlib.Path):
|
||||
super().__init__()
|
||||
self._path = path
|
||||
|
||||
async def exists(self) -> bool:
|
||||
return await self._run_in_executor(self._path.exists)
|
||||
|
||||
async def rename(self, target):
|
||||
return await self._run_in_executor(
|
||||
self._path.rename,
|
||||
target,
|
||||
)
|
||||
|
||||
async def read_text(self, encoding=None, errors=None):
|
||||
return await self._run_in_executor(
|
||||
self._path.read_text,
|
||||
encoding=encoding,
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
async def read_bytes(self):
|
||||
return await self._run_in_executor(self._path.read_bytes)
|
||||
|
||||
async def write_text(self, data, encoding=None, errors=None,
|
||||
newline=None):
|
||||
return await self._run_in_executor(
|
||||
self._path.write_text,
|
||||
data,
|
||||
encoding=encoding,
|
||||
errors=errors,
|
||||
newline=newline,
|
||||
)
|
||||
|
||||
async def write_bytes(self, data):
|
||||
return await self._run_in_executor(
|
||||
self._path.write_bytes,
|
||||
data,
|
||||
)
|
||||
|
||||
async def main():
|
||||
afile = AsyncioFile(pathlib.Path(__file__))
|
||||
|
||||
print('#' * 79)
|
||||
print('Exists:', await afile.exists())
|
||||
print('#' * 79)
|
||||
print('Contents:')
|
||||
print(await afile.read_text())
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
@ -0,0 +1,40 @@
|
||||
# See if you can make an echo server and client as separate
|
||||
# processes. Even though we did not cover
|
||||
# `multiprocessing.Pipe()`, I trust you can work with it
|
||||
# regardless. It can be created through
|
||||
# `a, b = multiprocessing.Pipe()` and you can use it with
|
||||
# `a.send()` or `b.send()` and `a.recv()` or `b.recv()`.
|
||||
import multiprocessing
|
||||
|
||||
|
||||
def echo_client(receive_pipe, send_pipe, message):
|
||||
print('client sending', message)
|
||||
send_pipe.send(message)
|
||||
print('client received', receive_pipe.recv())
|
||||
|
||||
|
||||
def echo_server(receive_pipe, send_pipe):
|
||||
while True:
|
||||
message = receive_pipe.recv()
|
||||
print('server received', message)
|
||||
send_pipe.send(message)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
a, b = multiprocessing.Pipe()
|
||||
server = multiprocessing.Process(
|
||||
target=echo_server,
|
||||
args=(a, b),
|
||||
)
|
||||
server.start()
|
||||
|
||||
for i in range(5):
|
||||
client = multiprocessing.Process(
|
||||
target=echo_client,
|
||||
args=(a, b, f'message {i}'),
|
||||
)
|
||||
client.start()
|
||||
client.join()
|
||||
|
||||
server.terminate()
|
||||
server.join()
|
||||
@ -0,0 +1,74 @@
|
||||
# Read all files in a directory and sum the size of the files by
|
||||
# reading each file using `concurrent.futures`. If you want an
|
||||
# extra challenge, walk through the directories recursively by
|
||||
# letting the thread/process queue new items while running.
|
||||
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import pathlib
|
||||
import time
|
||||
|
||||
# Our current directory
|
||||
PATH = pathlib.Path(__file__).parent
|
||||
|
||||
|
||||
def get_size(path: pathlib.Path) -> int:
|
||||
size = path.stat().st_size
|
||||
logging.info('%s is %d bytes', path, size)
|
||||
return size
|
||||
|
||||
|
||||
def get_total_size(path) -> int:
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
return sum(executor.map(get_size, path.iterdir()))
|
||||
|
||||
|
||||
def get_size_or_queue(
|
||||
executor: concurrent.futures.Executor,
|
||||
futures: list[concurrent.futures.Future],
|
||||
path: pathlib.Path,
|
||||
) -> int:
|
||||
# If the path is a directory, queue up the children
|
||||
if path.is_dir():
|
||||
for child in path.iterdir():
|
||||
futures.append(executor.submit(
|
||||
get_size_or_queue, executor, futures, child))
|
||||
|
||||
# A directory has size 0 but we recurse into it
|
||||
return 0
|
||||
else:
|
||||
return get_size(path)
|
||||
|
||||
|
||||
def get_total_size_recursive(path) -> int:
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
|
||||
# Note that we are using a regular list as a queue. This is
|
||||
# thread-safe because `list.append()` is atomic.
|
||||
futures.append(executor.submit(
|
||||
get_size_or_queue, executor, futures, path))
|
||||
|
||||
total_size = 0
|
||||
for future in futures:
|
||||
total_size += future.result()
|
||||
|
||||
return total_size
|
||||
|
||||
|
||||
def main(path: pathlib.Path):
|
||||
total_size = get_total_size(path)
|
||||
print(f'Total size for {path} is: {total_size}')
|
||||
|
||||
# Sleep so editors such as Pycharm don't mix the output
|
||||
time.sleep(0.5)
|
||||
|
||||
total_size = get_total_size_recursive(path)
|
||||
print(f'Recursive total size for {path} is: {total_size}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Use the parent directory to get a reasonable list of files
|
||||
main(PATH.parent)
|
||||
@ -0,0 +1,35 @@
|
||||
# Read all files in a directory and sum the size of the files by
|
||||
# reading each file using `multiprocessing`
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
import pathlib
|
||||
|
||||
# Directory to process
|
||||
PATH = pathlib.Path(__file__).parent.parent
|
||||
|
||||
# We need to setup the logging outside of the
|
||||
# `if __name__ == '__main__'` block because the
|
||||
# `multiprocessing` module will not execute that section.
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def get_size(path: pathlib.Path):
|
||||
size = path.stat().st_size
|
||||
logging.info(
|
||||
'%s is %d bytes',
|
||||
path.relative_to(PATH),
|
||||
size,
|
||||
)
|
||||
return size
|
||||
|
||||
|
||||
def main(path: pathlib.Path):
|
||||
with multiprocessing.Pool() as pool:
|
||||
total_size = sum(pool.map(get_size, path.iterdir()))
|
||||
|
||||
print(f'Total size for {path} is: {total_size}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(PATH)
|
||||
@ -0,0 +1,45 @@
|
||||
# Read all files in a directory and sum the size of the files by
|
||||
# reading each file using `threading`
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
import threading
|
||||
|
||||
# Directory to process
|
||||
PATH = pathlib.Path(__file__).parent.parent
|
||||
|
||||
|
||||
class FileSizeThread(threading.Thread):
|
||||
|
||||
def __init__(self, path: pathlib.Path):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.size = 0
|
||||
|
||||
def run(self):
|
||||
self.size = self.path.stat().st_size
|
||||
logging.info(
|
||||
'%s is %d bytes',
|
||||
self.path.relative_to(PATH),
|
||||
self.size,
|
||||
)
|
||||
|
||||
|
||||
def main(path: pathlib.Path):
|
||||
threads = []
|
||||
for child in path.iterdir():
|
||||
thread = FileSizeThread(child)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
total_size = 0
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
total_size += thread.size
|
||||
|
||||
print(f'Total size for {path} is: {total_size}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
main(PATH)
|
||||
@ -0,0 +1,83 @@
|
||||
# Read all files in a directory and sum the size of the files by reading each file using `processing` or `multiprocessing`
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
import pathlib
|
||||
|
||||
# Directory to process
|
||||
PATH = pathlib.Path(__file__).parent.parent
|
||||
WORKERS = 8
|
||||
POLL_INTERVAL = 0.25
|
||||
|
||||
# We need to setup the logging outside of the
|
||||
# `if __name__ == '__main__'` block because the
|
||||
# `multiprocessing` module will not execute that section.
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
class FileSizeProcess(multiprocessing.Process):
|
||||
size: multiprocessing.Value
|
||||
queue: multiprocessing.Queue
|
||||
|
||||
def __init__(self, size, queue):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
self.size = size
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
path = self.queue.get()
|
||||
|
||||
total_size = 0
|
||||
# Walk through the directory and sum the filesizes
|
||||
# for files and queue up directories
|
||||
child: pathlib.Path
|
||||
for child in path.iterdir():
|
||||
if child.is_dir():
|
||||
self.queue.put(child)
|
||||
else:
|
||||
size = child.stat().st_size
|
||||
total_size += size
|
||||
logging.info(
|
||||
'%s is %d bytes',
|
||||
child.relative_to(PATH),
|
||||
size,
|
||||
)
|
||||
|
||||
# Update the size in the shared memory. Since this is a
|
||||
# relatively slow operation we do it once per loop
|
||||
self.size.value += total_size
|
||||
|
||||
# The JoinableQueue requires us to tell it that we are
|
||||
# done with the item
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
def main(path: pathlib.Path):
|
||||
processs = []
|
||||
q = multiprocessing.JoinableQueue()
|
||||
q.put(path)
|
||||
|
||||
total_size = multiprocessing.Value('i', 0)
|
||||
|
||||
# Create, start and store the worker processs
|
||||
for _ in range(WORKERS):
|
||||
process = FileSizeProcess(total_size, q)
|
||||
process.start()
|
||||
processs.append(process)
|
||||
|
||||
# Wait until all the items in the queue have been processed
|
||||
q.join()
|
||||
q.close()
|
||||
|
||||
# Terminate all the processs
|
||||
for process in processs:
|
||||
process.terminate()
|
||||
process.join()
|
||||
|
||||
# Wait for all processs to finish and sum their sizes
|
||||
print(f'Total size for {path} is: {total_size.value}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(PATH)
|
||||
@ -0,0 +1,85 @@
|
||||
# Read all files in a directory and sum the size of the files by
|
||||
# reading each file using `threading` or `multiprocessing`
|
||||
#
|
||||
# As above, but walk through the directories recursively by
|
||||
# letting the thread/process queue new items while running.
|
||||
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
import queue
|
||||
import threading
|
||||
|
||||
# Directory to process
|
||||
PATH = pathlib.Path(__file__).parent.parent
|
||||
WORKERS = 8
|
||||
POLL_INTERVAL = 0.25
|
||||
|
||||
|
||||
class FileSizeThread(threading.Thread):
|
||||
# Create a `stop` event so we can stop the thread externally
|
||||
stop: threading.Event
|
||||
size: int
|
||||
queue: queue.Queue
|
||||
|
||||
def __init__(self, queue):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
self.size = 0
|
||||
self.stop = threading.Event()
|
||||
|
||||
def run(self):
|
||||
while not self.stop.is_set():
|
||||
# Get the next item from the queue if available. If the
|
||||
# queue is empty, wait for 0.25 second and try again
|
||||
# unless we are told to stop.
|
||||
try:
|
||||
path = self.queue.get(timeout=POLL_INTERVAL)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
# Walk through the directory and sum the filesizes
|
||||
# for files and queue up directories
|
||||
for child in path.iterdir():
|
||||
self.process_path(child)
|
||||
|
||||
def process_path(self, child):
|
||||
if child.is_dir():
|
||||
self.queue.put(child)
|
||||
else:
|
||||
size = child.stat().st_size
|
||||
self.size += size
|
||||
logging.info(
|
||||
'%s is %d bytes',
|
||||
child.relative_to(PATH),
|
||||
size,
|
||||
)
|
||||
|
||||
|
||||
def main(path: pathlib.Path):
|
||||
threads = []
|
||||
q = queue.Queue()
|
||||
q.put(path)
|
||||
|
||||
# Create, start and store the worker threads
|
||||
for _ in range(WORKERS):
|
||||
thread = FileSizeThread(q)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
# Stop all threads
|
||||
for thread in threads:
|
||||
thread.stop.set()
|
||||
|
||||
# Wait for all threads to finish and sum their sizes
|
||||
total_size = 0
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
total_size += thread.size
|
||||
|
||||
print(f'Total size for {path} is: {total_size}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
main(PATH)
|
||||
@ -0,0 +1,5 @@
|
||||
# Create a pool of workers that keeps waiting for items to be
|
||||
# queued through `multiprocessing.Queue()`.
|
||||
|
||||
# Please refer to exercise_04/multiprocessing_solution_00.py for
|
||||
# the solution to the exercise as it already uses this technique.
|
||||
@ -0,0 +1,50 @@
|
||||
# Convert the pool above to a safe RPC (remote procedure call)
|
||||
# type operation.
|
||||
import multiprocessing
|
||||
|
||||
WORKERS = 4
|
||||
|
||||
|
||||
def say(msg):
|
||||
print(f'Saying: {msg}')
|
||||
|
||||
|
||||
# Explicitly define the RPC methods to make this safer
|
||||
RPC_METHODS = dict(say=say)
|
||||
|
||||
|
||||
class RpcProcess(multiprocessing.Process):
|
||||
def __init__(self, queue: multiprocessing.Queue):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
func_name, args, kwargs = self.queue.get()
|
||||
func = RPC_METHODS[func_name]
|
||||
func(*args, **kwargs)
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
def main():
|
||||
q = multiprocessing.JoinableQueue()
|
||||
q.put(('say', ('hello',), {}))
|
||||
q.put(('say', ('world',), {}))
|
||||
# This should result in an error because this is not a valid
|
||||
# RPC method
|
||||
q.put(('non-existing-method', (), {}))
|
||||
|
||||
for _ in range(WORKERS):
|
||||
p = RpcProcess(q)
|
||||
p.start()
|
||||
|
||||
q.join()
|
||||
q.close()
|
||||
|
||||
for p in multiprocessing.active_children():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,105 @@
|
||||
# Apply your functional programming skills and calculate
|
||||
# something in a parallel way. Perhaps parallel sorting?
|
||||
import multiprocessing
|
||||
import random
|
||||
|
||||
# Since sorting is CPU limited we should not go above the number of
|
||||
# CPU cores
|
||||
WORKERS = multiprocessing.cpu_count()
|
||||
|
||||
|
||||
def merge_sort(data):
|
||||
if len(data) <= 1:
|
||||
return data
|
||||
|
||||
middle = len(data) // 2
|
||||
left = merge_sort(data[:middle])
|
||||
right = merge_sort(data[middle:])
|
||||
return merge(left, right)
|
||||
|
||||
|
||||
def merge(left, right):
|
||||
'''
|
||||
Merge two sorted lists into one sorted list
|
||||
|
||||
>>> merge([1, 3, 5], [2, 4, 6])
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
>>> merge([1, 3, 5], [2, 4, 6, 7])
|
||||
[1, 2, 3, 4, 5, 6, 7]
|
||||
>>> merge([1, 2, 3], [1, 2, 3])
|
||||
[1, 1, 2, 2, 3, 3]
|
||||
'''
|
||||
result = []
|
||||
left_index = right_index = 0
|
||||
|
||||
# When using iterators, we can avoid the IndexError of
|
||||
# accessing a non-existing element by using the `next`
|
||||
# function. This will raise a `StopIteration` error if
|
||||
# there are no more elements to iterate over.
|
||||
left_next = left[left_index]
|
||||
right_next = right[right_index]
|
||||
|
||||
while True:
|
||||
try:
|
||||
if left_next <= right_next:
|
||||
result.append(left_next)
|
||||
left_index += 1
|
||||
left_next = left[left_index]
|
||||
else:
|
||||
result.append(right_next)
|
||||
right_index += 1
|
||||
right_next = right[right_index]
|
||||
except IndexError:
|
||||
# If we get an IndexError, it means that we have
|
||||
# reached the end of one of the lists. We can
|
||||
# simply extend the result with the remaining
|
||||
# elements and break out of the loop.
|
||||
result.extend(left[left_index:] or right[right_index:])
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def split(data, size=WORKERS):
|
||||
'''
|
||||
Split a list into `size` different chunks so that each chunk
|
||||
can be processed in parallel.
|
||||
'''
|
||||
chunk_size = len(data) // size
|
||||
return [data[i:i + chunk_size] for i in
|
||||
range(0, len(data), chunk_size)]
|
||||
|
||||
|
||||
def multiprocessing_merge_sort(data):
|
||||
# Split the data into chunks
|
||||
with multiprocessing.Pool(processes=WORKERS) as pool:
|
||||
chunks = split(data, WORKERS)
|
||||
sorted_chunks = pool.map(merge_sort, chunks)
|
||||
|
||||
# Merge the chunks
|
||||
i = 0
|
||||
while len(sorted_chunks) > 1:
|
||||
# zip the chunks into pairs
|
||||
pairs = zip(sorted_chunks[::2], sorted_chunks[1::2])
|
||||
# merge the pairs in parallel
|
||||
merged_chunks = pool.starmap(merge, pairs)
|
||||
|
||||
# If we have an odd number of chunks, we need to
|
||||
# add the last chunk to the merged chunks
|
||||
if len(sorted_chunks) % 2 == 1:
|
||||
merged_chunks.append(sorted_chunks[-1])
|
||||
|
||||
sorted_chunks = merged_chunks
|
||||
|
||||
return sorted_chunks[0]
|
||||
|
||||
|
||||
def main():
|
||||
data = random.sample(range(1000), 100)
|
||||
sorted_data = multiprocessing_merge_sort(data)
|
||||
# Verify that the data is sorted correctly
|
||||
assert sorted_data == sorted(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user