/* "Flatten" a string of HTML, removing all HTML tags Original Source: (See copyright notice at ) */ /*" Remove HTML tags to turn a fragment of HTML into a piece of plain text. All characters between < and > are removed. Escape sequences such as & are converted into their character equivalents. "*/ // Note: This is slow approach that I'm now considering obsolete. // See this URL for a better approach: // http://sugarmaplesoftware.com/25/strip-html-tags/ - (NSString *) flattenHTML { NSString *result = self; if (![self isEqualToString:@""]) // if empty string, don't do this! You get junk. { // HACK -- IF SHORT LENGTH, USE MACROMAN -- FOR SOME REASON UNICODE FAILS FOR "" AND "-" AND "CNN" ... int encoding = ([self length] > 3) ? NSUnicodeStringEncoding : NSMacOSRomanStringEncoding; NSAttributedString *attrString; NSData *theData = [self dataUsingEncoding:encoding]; if (nil != theData) // this returned nil once; not sure why; so handle this case. { NSDictionary *encodingDict = [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:encoding] forKey:@"CharacterEncoding"]; attrString = [[NSAttributedString alloc] initWithHTML:theData documentAttributes:&encodingDict]; result = [[[attrString string] retain] autorelease]; // keep only this [attrString release]; // don't do autorelease since this is so deep down. } } return result; }