" ).strip() title = title.replace('<title>', '').replace('

# originally created and posted by user dgc on # https://discussion.evernote.com/topic/97201-how-to-transfer-all-the-notes-from-google-keep-to-evernote/ # Modified by user charlescanato https://gitlab.com/charlescanato/google-keep-to-evernote-converter # Modified by gokhan mete erturk to enable bulk operation of html files without any parameters and # solves the character set problems on Windows # Modified by Leonard777 to add importing of image data. # Modified by itsjfx to read a folder and import HTML files # until now, Google Takeout for Keep does NOT export: # - correct order of lists notes (non-checked first, checked last) # - list items indentation import argparse import sys import re import parsedatetime as pdt import time import glob import hashlib import base64 import os cal = pdt.Calendar() r1 = re.compile('

☑.*?(.*?).*?

') r2 = re.compile('

☐.*?(.*?).*?

') r3 = re.compile('([^<]*)[^<]*') # Use non-greedy expressions to support multiple image tags for each note r4 = re.compile('

') r5 = re.compile('

(.*)

') def readlineUntil(file, str): currLine = "" while not str in currLine: currLine = file.readline() return currLine def readTagsFromChips(line): # line might still have chips if line.startswith('

'): return line + '\n' def readImagesFromAttachment(line): # Attachments need a name, so we will use the note title with a numeric suffix to make them unique. # Suffix number for multiple attachments of the same name attachmentNumber = 0 result = () m = r4.search(line) while m: h = hashlib.md5(base64.b64decode(m.group(3).encode("utf-8"))) # Import all images at 1024px wide. Not sure if we can determine original size from binary data or not. newContent = '\n

' imageFormat = m.group(1).split('/')[1] newResource = '' + m.group(3) + '\n' + m.group(1) + '

IMAGE_FILE_NAME_' + str(attachmentNumber) + '.' + imageFormat + '

\n' result += (newContent, newResource) attachmentNumber += 1 line = line[m.end():] m = r4.search(line) return result def mungefile(fn): fp = open(fn, 'r', encoding="utf8") title = readlineUntil( fp, "" ).strip() title = title.replace('<title>', '').replace('', '') readlineUntil( fp, "" ) t = fp.readline() tags = '' resources = '' if '"archived"' in t: tags = 'archived' fp.readline() #

alone date = fp.readline().strip().replace('', '') dt, flat = cal.parse(date) iso = time.strftime('%Y%m%dT%H%M%SZ', time.gmtime(time.mktime(dt))) fp.readline() # extra title content = fp.readline() m = r5.search(content) if m: content = m.group(1) content = content.replace( '

'): content += readTagsFromChips(line) # Attachments contains the image data elif line.startswith('

'): result = readImagesFromAttachment(line) i = 0 while i < len(result): if i+1 < len(result): content += result[i] # Use the note title without spaces as the image file name currentResource = result[i+1].replace("IMAGE_FILE_NAME", title.replace(' ', '')) resources += currentResource i += 2 else: content += line + '\n' content = content.replace('
', '
') content = content.replace('\n', '\0') while True: m = r1.search(content) if not m: break content = content[:m.start()] + '' + m.group(1) + '
' + content[m.end():] while True: m = r2.search(content) if not m: break content = content[:m.start()] + '' + m.group(1) + '
' + content[m.end():] content = content.replace('\0', '\n') # remove list close (if it was a list) lastUl = content.rfind('

') if lastUl != -1: content = content[:lastUl] + content[lastUl+5:] m = r3.search(content) if m: content = content[:m.start()] + content[m.end():] tags = '' + m.group(1) + '' content = re.sub(r'class="[^"]*"', '', content) fp.close() print (''' {title} {content}]]> {iso} {iso} {tags} 0 0 google-keep 0 {resources} '''.format(**locals()), file=fxt) parser = argparse.ArgumentParser(description="Convert Google Keep notes from .html to .enex for Evernote") parser.add_argument('-o', '--output', help="The output file to write into. If not specified output goes to stdout.", default="sys.stdout") parser.add_argument('-f', '--folder', help="The path to the folder which contains the HTML files", default="./Keep") args = parser.parse_args() if args.output == "sys.stdout": fxt = sys.stdout else: fxt = open(args.output, "w", encoding="utf8") print (''' ''', file=fxt) for f in os.listdir(args.folder): if f.endswith(".html"): mungefile(os.path.join(args.folder, f)) print ('''''', file=fxt) fxt.close()