/*
"Flatten" a string of HTML, removing all HTML tags
Original Source:
(See copyright notice at )
*/
/*" Remove HTML tags to turn a fragment of HTML into a piece of plain text. All characters between < and > are removed.
Escape sequences such as & are converted into their character equivalents.
"*/
// Note: This is slow approach that I'm now considering obsolete.
// See this URL for a better approach:
// http://sugarmaplesoftware.com/25/strip-html-tags/
- (NSString *) flattenHTML
{
NSString *result = self;
if (![self isEqualToString:@""]) // if empty string, don't do this! You get junk.
{
// HACK -- IF SHORT LENGTH, USE MACROMAN -- FOR SOME REASON UNICODE FAILS FOR "" AND "-" AND "CNN" ...
int encoding = ([self length] > 3) ? NSUnicodeStringEncoding : NSMacOSRomanStringEncoding;
NSAttributedString *attrString;
NSData *theData = [self dataUsingEncoding:encoding];
if (nil != theData) // this returned nil once; not sure why; so handle this case.
{
NSDictionary *encodingDict = [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:encoding] forKey:@"CharacterEncoding"];
attrString
= [[NSAttributedString alloc]
initWithHTML:theData documentAttributes:&encodingDict];
result = [[[attrString string] retain] autorelease]; // keep only this
[attrString release]; // don't do autorelease since this is so deep down.
}
}
return result;
}