From 4cf1cf77037ae340da6a4e7f9e66182b7c00488b Mon Sep 17 00:00:00 2001 From: Petro Dudi Date: Sun, 27 Jan 2019 23:32:55 +0200 Subject: [PATCH 1/2] Python 3.x support and more... --- README.md | 7 ++++++ android-gm-extractor.py | 52 ++++++++++++++++++++++++++--------------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f656197..bfd434c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ Extract, parse, and decompress data from an Android Gmail database. This is a standalone script. + +Changelog 20190127 [changes by [Petro Dudi](https://github.com/pdudis)]: + +- Adapted for Python 3.x +- Added bad_chars_sub() to replace subject fields containing "/" and "\" with "-", and return a string of 50 chars max +- Added unicode support when writing decompressed body field to file +- Prints account names and number of processed emails to standard output diff --git a/android-gm-extractor.py b/android-gm-extractor.py index 876a40a..7f6d83a 100755 --- a/android-gm-extractor.py +++ b/android-gm-extractor.py @@ -6,6 +6,18 @@ outputs to standard output with HTML formatting. +======================================== +Updated by Petro Dudi on 20190127 +- Adapted for Python 3.x +- Added bad_chars_sub() to replace + subject fields containing "/" and "\" + with "-", and return a string of 50 + chars max +- Added unicode support when writing + decompressed body field to file +- Prints account names and number of + processed emails to standard output + ======================================== Updated by CBRYCE on 20150107 @@ -23,6 +35,7 @@ ''' import sys +import os import time import sqlite3 import zlib @@ -54,6 +67,14 @@ def bad_chars(string): string = string.replace(char, '') return string +def bad_chars_sub(string): + '''Removes / and \ from string. + Returns cleaned string.''' + for char in ['/', '\\']: + if char in string: + string = string.replace(char, '-') + return string[0:50] + def main(path, outputPath): con = sqlite3.connect(path) @@ -79,30 +100,32 @@ def main(path, outputPath): em_subject = row[8] em_body = row[9] - outputFile = open(outputPath+"/"+str(em_id)+"__"+str(em_subject), 'w') + outputFile = open(outputPath+"/"+str(em_id)+"__"+bad_chars_sub(em_subject)+".html", 'w') outputFile.write('') write_css(outputFile) outputFile.write('
''ID:' + str(em_id) + - '

'+'From:' + bad_chars(em_faddress).encode('utf-8') + '
' + + '

'+'From:' + bad_chars(em_faddress) + '
' + 'Date Received (UTC +0):' + epoch_to_date(em_rdate) + '

' + - 'To:' + bad_chars(em_taddress).encode('utf-8') + '
' + + 'To:' + bad_chars(em_taddress) + '
' + 'Date Sent (UTC +0):' + epoch_to_date(em_sdate) + '

' - 'CC:' + bad_chars(em_caddress).encode('utf-8') + '
' + - 'BCC:' + bad_chars(em_baddress).encode('utf-8') + '
' + - 'Reply-To Address:' + bad_chars(em_raddress).encode('utf-8') + + 'CC:' + bad_chars(em_caddress) + '
' + + 'BCC:' + bad_chars(em_baddress) + '
' + + 'Reply-To Address:' + bad_chars(em_raddress) + '

' + '
' + - 'Subject:' + em_subject.encode('utf-8') + '

' + + 'Subject:' + str(em_subject) + '

' + 'Body:
') if em_body: dem_body = zlib.decompress(em_body) - outputFile.write(dem_body) + outputFile.write(dem_body.decode('utf-8')) outputFile.write('
') email_count += 1 - outputFile.write('') - #print email_count + outputFile.write('') + + accountNamePrint = os.path.basename(outputPath) + print(accountNamePrint + ': ' + str(email_count) + ' emails') cur.close() con.close() @@ -117,15 +140,6 @@ def scan_for_files(path): for root, subdirs, files in os.walk(path): for fileEntry in files: - # if fileEntry.startswith('internal.') and fileEntry.endswith('.db') and fileEntry.__contains__('@'): - # accountNameInternal = fileEntry.strip('internal.') - # accountNameInternal = accountNameInternal.strip('.db') - # - # account_info['account'] = accountNameInternal - # account_info['path'] = os.path.join(root, fileEntry) - # - # filesToProcess.append(account_info) - # account_info = dict() if fileEntry.startswith('mailstore.') and fileEntry.endswith('.db') and fileEntry.__contains__('@'): accountNameInternal = fileEntry.split('mailstore.', 1)[1] From 5765ef08ae8bac8a8e2587a2cae70aacb41b6be4 Mon Sep 17 00:00:00 2001 From: Petro Dudi Date: Sun, 27 Jan 2019 23:40:34 +0200 Subject: [PATCH 2/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bfd434c..c069b69 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,6 @@ This is a standalone script. Changelog 20190127 [changes by [Petro Dudi](https://github.com/pdudis)]: - Adapted for Python 3.x -- Added bad_chars_sub() to replace subject fields containing "/" and "\" with "-", and return a string of 50 chars max +- Added bad_chars_sub() to replace subject fields containing "/" and "\\" with "-", and return a string of 50 chars max - Added unicode support when writing decompressed body field to file - Prints account names and number of processed emails to standard output