Clean up your teeming reading list.
Here is the GitHub repo. with the script.
If you are like me, your reading list is overflowing. Mine is over 1000+ entries. I use it daily to quickly remind myself of useful websites I’ve found, but I never bother to clean it up after those sites have used their purpose.
I didn’t want to clean it up manually by clicking through 1000+ items with a mouse is tedious. There is a shortcut to remove all items, but before doing that I wanted to export the data.
Here’s how it’s done.
Your reading list is stored in ~/Library/Safari/Bookmarks.plist
(at least, in Mac 11.4
Big Sur it is). Additionally, icons for the reading list are stored in ~/Library/Safari/ReadingListArchives
.
To read the .plist
file format in Python, it is easiest to use the plistlib library:
pip install plistlib
Here is the complete script — the explanation is below:
python export\_reading\_list.py csv reading\_list.csv
will write the reading list to reading_list.csv
.
python export\_reading\_list.py json reading\_list.json
will write the reading list to reading_list.json
.
python export\_reading\_list.py csv reading\_list.csv — dir-icons-out reading\_list\_icons
copies the icons to the folder reading_list_icons
. They match up to the entries through the WebBookmarkUUID
key.
python export\_reading\_list.py csv reading\_list.csv — dir-icons ~/Library/Safari/ReadingListArchives
The default is ~/Library/Safari/ReadingListArchives
.
.plist
file:python export\_reading\_list.py csv reading\_list.csv — fname-bookmarks ~/Library/Safari/Bookmarks.plist
The default is ~/Library/Safari/Bookmarks.plist
.
python export\_reading\_list.py csv reading\_list.csv — include-data
The data is written to the Data
field. The default is the--exclude-data
option, which excludes the data.
First, copy the plist
file for safety:
# Copy the plist file for safety | |
fname_plist = "tmp.plist" | |
command = "cp %s %s" % (args.fname_bookmarks, fname_plist) | |
print("Making temporary copy of reading list: %s" % command) | |
Popen(command, shell=True).wait() |
Next, find reading list elements in this terribly formatted dictionary:
def find_dicts_with_rlist_keys_in_dict(base_dict): | |
ret = [] | |
for key,val in base_dict.items(): | |
if key == "Children": | |
# Recurse down | |
for child_dict in val: | |
ret += find_dicts_with_rlist_keys_in_dict(child_dict) | |
elif key == "ReadingList": | |
ret.append(base_dict) | |
break | |
return ret | |
# Load the plist file | |
with open("tmp.plist",'rb') as f: | |
res = plistlib.load(f) | |
# Find the reading list items | |
rlist = find_dicts_with_rlist_keys_in_dict(res) | |
print("You have: %d items in your reading list" % len(rlist)) |
Convert the reading list dictionaries to custom objects:
@dataclass | |
class ReadingListItem(json.JSONEncoder): | |
title: str | |
ServerID: str | |
neverFetchMetadata: bool | |
WebBookmarkType: str | |
WebBookmarkUUID: str | |
URLString: str | |
DateAdded: datetime.datetime | |
Data: Union[str,None] = None | |
siteName: Union[str,None] = None | |
PreviewText: Union[str,None] = None | |
DateLastFetched: Union[datetime.datetime,None] = None | |
imageURL: Union[str,None] = None | |
didAttemptToFetchIconFromImageUrlKey: Union[bool,None] = None | |
NumberOfFailedLoadsWithUnknownOrNonRecoverableError: Union[int,None] = None | |
FetchResult: Union[int,None] = None | |
AddedLocally: Union[bool,None] = None | |
def to_json(self): | |
df = "%Y-%m-%d %H:%M:%S" | |
res = { | |
"title": self.title, | |
"ServerID": self.ServerID, | |
"neverFetchMetadata": self.neverFetchMetadata, | |
"WebBookmarkType": self.WebBookmarkType, | |
"WebBookmarkUUID": self.WebBookmarkUUID, | |
"URLString": self.URLString, | |
"DateAdded": self.DateAdded.strftime(df) | |
} | |
if self.Data != None: | |
res["Data"] = base64.b64encode(self.Data).decode('utf-8') | |
if self.siteName != None: | |
res["siteName"] = self.siteName | |
if self.PreviewText != None: | |
res["PreviewText"] = self.PreviewText | |
if self.DateLastFetched != None: | |
res["DateLastFetched"] = self.DateLastFetched.strftime(df) | |
if self.imageURL != None: | |
res["imageURL"] = self.imageURL | |
if self.didAttemptToFetchIconFromImageUrlKey != None: | |
res['didAttemptToFetchIconFromImageUrlKey'] = self.didAttemptToFetchIconFromImageUrlKey | |
if self.FetchResult != None: | |
res["FetchResult"] = self.FetchResult | |
if self.AddedLocally != None: | |
res["AddedLocally"] = self.AddedLocally | |
if self.NumberOfFailedLoadsWithUnknownOrNonRecoverableError != None: | |
res["NumberOfFailedLoadsWithUnknownOrNonRecoverableError"] = self.NumberOfFailedLoadsWithUnknownOrNonRecoverableError | |
return res | |
def to_json_full(self): | |
df = "%Y-%m-%d %H:%M:%S" | |
res = { | |
"title": self.title, | |
"ServerID": self.ServerID, | |
"neverFetchMetadata": self.neverFetchMetadata, | |
"WebBookmarkType": self.WebBookmarkType, | |
"WebBookmarkUUID": self.WebBookmarkUUID, | |
"URLString": self.URLString, | |
"siteName": self.siteName, | |
"PreviewText": self.PreviewText, | |
"imageURL": self.imageURL, | |
"didAttemptToFetchIconFromImageUrlKey": self.didAttemptToFetchIconFromImageUrlKey, | |
"FetchResult": self.FetchResult, | |
"AddedLocally": self.AddedLocally, | |
"NumberOfFailedLoadsWithUnknownOrNonRecoverableError": self.NumberOfFailedLoadsWithUnknownOrNonRecoverableError | |
} | |
if self.Data != None: | |
res["Data"] = base64.b64encode(self.Data).decode('utf-8') | |
else: | |
res["Data"] = None | |
if self.DateAdded != None: | |
res["DateAdded"] = self.DateAdded.strftime(df) | |
else: | |
res["DateAdded"] = None | |
if self.DateLastFetched != None: | |
res["DateLastFetched"] = self.DateLastFetched.strftime(df) | |
else: | |
res["DateLastFetched"] = None | |
return res | |
@classmethod | |
def fromRDict(cls, r, include_data): | |
ritem = cls( | |
title=r['URIDictionary']['title'], | |
ServerID=r['Sync']['ServerID'], | |
neverFetchMetadata=r['ReadingListNonSync']['neverFetchMetadata'], | |
WebBookmarkType=r['WebBookmarkType'], | |
WebBookmarkUUID=r['WebBookmarkUUID'], | |
URLString=r['URLString'], | |
DateAdded=r['ReadingList']['DateAdded'] | |
) | |
if include_data: | |
ritem.Data = r['Sync']['Data'] | |
if 'ReadingListNonSync' in r and 'siteName' in r['ReadingListNonSync']: | |
ritem.siteName = r['ReadingListNonSync']['siteName'] | |
if 'ReadingList' in r and 'PreviewText' in r['ReadingList']: | |
ritem.PreviewText = r['ReadingList']['PreviewText'] | |
if 'ReadingListNonSync' in r and 'DateLastFetched' in r['ReadingListNonSync']: | |
ritem.DateLastFetched = r['ReadingListNonSync']['DateLastFetched'] | |
if 'imageURL' in r: | |
ritem.imageURL = r['imageURL'] | |
if 'ReadingListNonSync' in r \ | |
and 'didAttemptToFetchIconFromImageUrlKey' in r['ReadingListNonSync']: | |
ritem.didAttemptToFetchIconFromImageUrlKey = \ | |
r['ReadingListNonSync']['didAttemptToFetchIconFromImageUrlKey'] | |
if 'ReadingListNonSync' in r \ | |
and 'NumberOfFailedLoadsWithUnknownOrNonRecoverableError' in r['ReadingListNonSync']: | |
ritem.NumberOfFailedLoadsWithUnknownOrNonRecoverableError = \ | |
r['ReadingListNonSync']['NumberOfFailedLoadsWithUnknownOrNonRecoverableError'] | |
if 'ReadingListNonSync' in r and 'FetchResult' in r['ReadingListNonSync']: | |
ritem.FetchResult = r['ReadingListNonSync']['FetchResult'] | |
if 'ReadingListNonSync' in r and 'AddedLocally' in r['ReadingListNonSync']: | |
ritem.AddedLocally = r['ReadingListNonSync']['AddedLocally'] | |
return ritem | |
# Iterate over reading list items | |
ritems = [ ReadingListItem.fromRDict(r,args.include_data) for r in rlist ] | |
Finally, dump the entries to JSON or CSV:
# Dump | |
if args.output_mode == 'json': | |
# Write JSON | |
with open(args.fname_out,'w') as f: | |
rjson = [ r.to_json() for r in ritems ] | |
json.dump(rjson,f,indent=3) | |
print("Wrote reading list to JSON file: %s" % args.fname_out) | |
elif args.output_mode == 'csv': | |
# Dump to CSV | |
with open(args.fname_out,'w') as f: | |
csv_writer = csv.writer(f) | |
rjson = [ r.to_json_full() for r in ritems ] | |
# Write header | |
csv_writer.writerow(rjson[0].keys()) | |
# Write contents | |
for row in rjson: | |
csv_writer.writerow(row.values()) |
Now we have a backup of the reading list. A future project may write an edited CSV or JSON file back to the reading list plist
format recognized by Safari.
Oliver K. Ernst
December 16, 2021