iT邦幫忙

0

gcloud storage rsync 遇到 ERROR: 'utf-8' codec can't encode characters

  • 分享至 

  • xImage

各位好,朋友的 server 有兩個網站想要使用 storage rsync 將網站的檔案同步到 cloud storage 上面,執行 gcloud sotrage rsync 時會遇到

ERROR: 'utf-8' codec can't encode characters in position 150903-150907: surrogates not allowed

他的 server 以前被入侵過,所以會有一些類似

'670662_B022 - '$'\261\266\256''| (2).lnk'
'670662_B022 - '$'\261\266\256''|.lnk'

這種奇怪的檔案,我已經寫了一隻 python 去抓

import os

def is_utf8_encoded(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            f.read()
        return True
    except UnicodeDecodeError:
        return False
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return False

def find_files_with_encoding_issue(directory):
    problematic_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                if not is_utf8_encoded(file_path):
                    problematic_files.append(file_path)
            except UnicodeEncodeError:
                print(f"UnicodeEncodeError: Current directory: {os.path.abspath(root)}")
                print(f"UnicodeEncodeError: {file_path}")
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
    return problematic_files

directory_path = "/data/docker/www/mami/"
problematic_files = find_files_with_encoding_issue(directory_path)

if problematic_files:
    print("Files with potential encoding issues:")
    with open('/tmp/result.txt', 'w', encoding='utf-8') as result_file:
        for file_path in problematic_files:
            try:
                import os

def is_utf8_encoded(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            f.read()
        return True
    except UnicodeDecodeError:
        return False
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return False

def find_files_with_encoding_issue(directory):
    problematic_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                if not is_utf8_encoded(file_path):
                    problematic_files.append(file_path)
            except UnicodeEncodeError:
                print(f"UnicodeEncodeError: Current directory: {os.path.abspath(root)}")
                print(f"UnicodeEncodeError: {file_path}")
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
    return problematic_files

directory_path = "/data/docker/www/mami/"
problematic_files = find_files_with_encoding_issue(directory_path)

if problematic_files:
    print("Files with potential encoding issues:")
    with open('/tmp/result.txt', 'w', encoding='utf-8') as result_file:
        for file_path in problematic_files:
            try:
                result_file.write(file_path + '\n')
            except UnicodeEncodeError:
                current_directory = os.path.abspath(os.path.dirname(file_path))
                print(f"UnicodeEncodeError: Current directory: {current_directory}")
                result_file.write(current_directory.encode('utf-8', 'ignore').decode('utf-8') + '\n')
                result_file.write(file_path.encode('utf-8', 'ignore').decode('utf-8') + '\n')
else:
    print("No files with potential encoding issues found.")
                result_file.write(file_path + '\n')
            except UnicodeEncodeError:
                current_directory = os.path.abspath(os.path.dirname(file_path))
                print(f"UnicodeEncodeError: Current directory: {current_directory}")
                result_file.write(current_directory.encode('utf-8', 'ignore').decode('utf-8') + '\n')
                result_file.write(file_path.encode('utf-8', 'ignore').decode('utf-8') + '\n')
else:
    print("No files with potential encoding issues found.")

在 A 網站掃出來清除後可以順利 rsync,B 網站已經掃不到東西了但還是遇到那個 encode 錯誤,cloud rsync 也沒說是哪個檔案有問題,請問有辦法解決嗎,目錄以及檔案眾多肉眼去找有點困難

圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友回答

立即登入回答