File-Transfer

將文件名中的 MacCyrillic 編碼轉換為 NTFS 文件系統上的 IBM866

  • October 8, 2020

通過 samba 將文件從 MacOS 系統複製到 Windows 共享後,我得到的文件名如下:

Сђ•вл
К†в†ЂЃ¶≠л• Ђ®бвл.pdf
П†бѓЃав.doc

通常它們應該看起來像:

Сметы
Каталожные листы.pdf
Паспорт.doc

在某些情況下,我還在名稱末尾得到 U+F028 符號:

Новые

有沒有辦法在 Windows 機器上自動確定和轉換此類文件?

我最終編寫了自己的腳本……

程式碼很糟糕並且沒有經過太多測試,但適用於我的情況。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
(Not) Simple MacCyrillic -> IBM866 converting script.
"""

import sys
if sys.version_info.major == 2:
   print("Please run it with Python 3.4 or better.")
   exit()

import os
if os.name != 'posix':
   print("This script works only on GNU/Linux.")
   exit(1)

import subprocess

BadChars = ['©', '•', '≠', '£', '¢', '†', 'ѓ', 'Ѓ', 'Ђ', '§', '¶', 'ђ', '®', '°', 'Ґ']

replaceTable = {
           '©': 'й', 
           'ж': 'ц',
           'г': 'у',
           '™': 'к',
           '•': 'е',
           '≠': 'н',
           '£': 'г',
           'и': 'ш',
           'й': 'щ',
           'І': 'з',
           'е': 'х',
           'к': 'ъ',
           'д': 'ф',
           'л': 'ы',
           '¢': 'в',
           '†': 'а',
           'ѓ': 'п',
           'а': 'р',
           'Ѓ': 'о',
           'Ђ': 'л',
           '§': 'д',
           '¶': 'ж',
           'н': 'э',
           'п': 'я',
           'з': 'ч',
           'б': 'с',
           'ђ': 'м',
           '®': 'и',
           'в': 'т',
           'м': 'ь',
           '°': 'б',
           'о': 'ю',
           'Ґ': 'в'
           }

def check_all_path_string(path, BadChars):
   for symbol in BadChars:
       if symbol in path:
           return True 
   return False

def check_part_of_name(part_of_name, BadChars):
   for letter in part_of_name:
       if letter in BadChars:
           return True
   
   return False

def replace_symbols(part_of_name, replaceTable):
   changed_part_of_name = ""

   for sym in part_of_name:
       if sym in list(replaceTable.keys()):
           changed_part_of_name += replaceTable[sym]
       else:
           changed_part_of_name += sym
   
   return changed_part_of_name
           
def check_part_of_bad_path(bad_file_name_list, BadChars, replaceTable):
   replaced_path = ""
   for part_of_name in bad_file_name_list:
       if not check_part_of_name(part_of_name, BadChars):
           replaced_path += "/" + part_of_name
       else:
           replaced_part_of_name = replace_symbols(part_of_name, replaceTable)
           replaced_path += "/" + replaced_part_of_name
   
   if "//" in replaced_path:
       replaced_path = replaced_path.replace("//", "/")

   return replaced_path

def main_validation(files, BadChars, replaceTable):
   validated_list = []

   for file_name in files:
       if check_all_path_string(file_name, BadChars):
           bad_file_name = file_name
           bad_file_name_list = bad_file_name.split("/")

           replaced_path = check_part_of_bad_path(bad_file_name_list, 
                                                   BadChars, 
                                                   replaceTable)
           validated_list.append(replaced_path)
       
       else:

           validated_list.append(file_name)

   return validated_list

def grab_files(folder, find_type):
   files = subprocess.run(["find", folder, "-type", find_type], stdout=subprocess.PIPE).stdout.decode('utf-8')
   files = files.splitlines()

   return files

if __name__ == "__main__":
   folder = os.getcwd()
   find_type = "d" # 'd' for directories or 'f' for files
   print("Grab files from", folder)
   files = grab_files(folder, find_type)
   print("Starting validation...")
   validated = main_validation(files, BadChars, replaceTable)
   print("Computing diff... (this can take a long time)")
   files_diff = [elem for elem in files if elem not in validated ]
   validated_diff = [elem for elem in validated if elem not in files ]

   print("Overall source count:", len(files))
   print("Validated diff:", len(validated_diff))

   i = 0
   while i < len(validated_diff):
       print(files_diff[i], '->', validated_diff[i])
       i = i + 1
   
   print("\nProceed?")
   choice = input("[Y]es | [N]o > ")
   if choice == 'y' or choice == 'Y':
       i = 0
       while i < len(validated_diff):
           source = '"' + files_diff[i] + '"'
           dest = '"' + validated_diff[i] + '"'
           os.system('mv -i ' + source + ' ' + dest)
           i = i + 1
   elif choice == 'n' or choice == 'N':
       print("Sure, it's okay. Thanks for playing!")
       exit()
   else:
       print("Sorry, I don't understand you.")
       print("Assuming as negative, exiting...")
       exit()
   

引用自:https://serverfault.com/questions/1036325