From 45d71cb362c3bd0bd5442e6dc482d22749c7aabc Mon Sep 17 00:00:00 2001 From: Dan Puttick Date: Mon, 10 Apr 2017 13:39:28 +0200 Subject: [PATCH] Fix unicode filename issues using fsencode * Same problem we've had before - linux filenames can have non-unicode chars in them * We need to write the filename as raw bytes to the log * os.fsencode lets us convert a utf-8 encoded string to bytes and ignore those that can't be printed as unicode * Still not clear if the log generated this way will be human-readable --- bin/filecheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/filecheck.py b/bin/filecheck.py index 8200eeb..19cb93f 100644 --- a/bin/filecheck.py +++ b/bin/filecheck.py @@ -576,12 +576,12 @@ class GroomerLogger(object): return path_depth def _write_line_to_log(self, line, indentation_depth): - # TODO: should we use fsencode and fsdecode here instead of just bytestrings? padding = b' ' padding += b'| ' * indentation_depth + line_bytes = os.fsencode(line) with open(self.log_path, mode='ab') as lf: lf.write(padding) - lf.write(bytes(line, encoding='utf-8')) + lf.write(line_bytes) lf.write(b'\n')