This source file includes following definitions.
- haveAddedContentsBeforeEnd
- preActionBeforeSerializeOpenTag
- postActionAfterSerializeOpenTag
- preActionBeforeSerializeEndTag
- postActionAfterSerializeEndTag
- saveHTMLContentToBuffer
- encodeAndFlushBuffer
- openTagToString
- endTagToString
- buildContentForNode
- m_xmlEntities
- collectTargetFrames
- serialize
#include "config.h"
#include "WebPageSerializerImpl.h"
#include "HTMLNames.h"
#include "WebFrameImpl.h"
#include "core/dom/Document.h"
#include "core/dom/DocumentType.h"
#include "core/dom/Element.h"
#include "core/editing/markup.h"
#include "core/html/HTMLAllCollection.h"
#include "core/html/HTMLElement.h"
#include "core/html/HTMLFormElement.h"
#include "core/html/HTMLHtmlElement.h"
#include "core/html/HTMLMetaElement.h"
#include "core/loader/DocumentLoader.h"
#include "core/loader/FrameLoader.h"
#include "public/platform/WebVector.h"
#include "wtf/text/TextEncoding.h"
using namespace WebCore;
namespace blink {
static const unsigned dataBufferCapacity = 65536;
WebPageSerializerImpl::SerializeDomParam::SerializeDomParam(const KURL& url,
const WTF::TextEncoding& textEncoding,
Document* document,
const String& directoryName)
: url(url)
, textEncoding(textEncoding)
, document(document)
, directoryName(directoryName)
, isHTMLDocument(document->isHTMLDocument())
, haveSeenDocType(false)
, haveAddedCharsetDeclaration(false)
, skipMetaElement(0)
, isInScriptOrStyleTag(false)
, haveAddedXMLProcessingDirective(false)
, haveAddedContentsBeforeEnd(false)
{
}
String WebPageSerializerImpl::preActionBeforeSerializeOpenTag(
const Element* element, SerializeDomParam* param, bool* needSkip)
{
StringBuilder result;
*needSkip = false;
if (param->isHTMLDocument) {
ASSERT(element);
if (isHTMLMetaElement(*element)) {
const HTMLMetaElement& meta = toHTMLMetaElement(*element);
String equiv = meta.httpEquiv();
if (equalIgnoringCase(equiv, "content-type")) {
String content = meta.content();
if (content.length() && content.contains("charset", false)) {
param->skipMetaElement = element;
*needSkip = true;
}
}
} else if (isHTMLHtmlElement(*element)) {
if (!param->haveSeenDocType) {
param->haveSeenDocType = true;
result.append(createMarkup(param->document->doctype()));
}
result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->url));
} else if (isHTMLBaseElement(*element)) {
result.append("<!--");
}
} else {
if (!param->haveAddedXMLProcessingDirective) {
param->haveAddedXMLProcessingDirective = true;
String xmlEncoding = param->document->xmlEncoding();
if (xmlEncoding.isEmpty())
xmlEncoding = param->document->encodingName();
if (xmlEncoding.isEmpty())
xmlEncoding = UTF8Encoding().name();
result.append("<?xml version=\"");
result.append(param->document->xmlVersion());
result.append("\" encoding=\"");
result.append(xmlEncoding);
if (param->document->xmlStandalone())
result.append("\" standalone=\"yes");
result.append("\"?>\n");
}
if (!param->haveSeenDocType) {
param->haveSeenDocType = true;
result.append(createMarkup(param->document->doctype()));
}
}
return result.toString();
}
String WebPageSerializerImpl::postActionAfterSerializeOpenTag(
const Element* element, SerializeDomParam* param)
{
StringBuilder result;
param->haveAddedContentsBeforeEnd = false;
if (!param->isHTMLDocument)
return result.toString();
if (!param->haveAddedCharsetDeclaration
&& isHTMLHeadElement(*element)) {
param->haveAddedCharsetDeclaration = true;
result.append(WebPageSerializer::generateMetaCharsetDeclaration(
String(param->textEncoding.name())));
param->haveAddedContentsBeforeEnd = true;
} else if (isHTMLScriptElement(*element) || isHTMLScriptElement(*element)) {
param->isInScriptOrStyleTag = true;
}
return result.toString();
}
String WebPageSerializerImpl::preActionBeforeSerializeEndTag(
const Element* element, SerializeDomParam* param, bool* needSkip)
{
String result;
*needSkip = false;
if (!param->isHTMLDocument)
return result;
if (param->skipMetaElement == element) {
*needSkip = true;
} else if (isHTMLScriptElement(*element) || isHTMLScriptElement(*element)) {
ASSERT(param->isInScriptOrStyleTag);
param->isInScriptOrStyleTag = false;
}
return result;
}
String WebPageSerializerImpl::postActionAfterSerializeEndTag(
const Element* element, SerializeDomParam* param)
{
StringBuilder result;
if (!param->isHTMLDocument)
return result.toString();
if (isHTMLBaseElement(*element)) {
result.append("-->");
result.append(WebPageSerializer::generateBaseTagDeclaration(
param->document->baseTarget()));
}
return result.toString();
}
void WebPageSerializerImpl::saveHTMLContentToBuffer(
const String& result, SerializeDomParam* param)
{
m_dataBuffer.append(result);
encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsNotFinished,
param,
DoNotForceFlush);
}
void WebPageSerializerImpl::encodeAndFlushBuffer(
WebPageSerializerClient::PageSerializationStatus status,
SerializeDomParam* param,
FlushOption flushOption)
{
if (flushOption != ForceFlush && m_dataBuffer.length() <= dataBufferCapacity)
return;
String content = m_dataBuffer.toString();
m_dataBuffer.clear();
CString encodedContent = param->textEncoding.normalizeAndEncode(content, WTF::EntitiesForUnencodables);
m_client->didSerializeDataForFrame(param->url,
WebCString(encodedContent.data(), encodedContent.length()),
status);
}
void WebPageSerializerImpl::openTagToString(Element* element,
SerializeDomParam* param)
{
bool needSkip;
StringBuilder result;
result.append(preActionBeforeSerializeOpenTag(element, param, &needSkip));
if (needSkip)
return;
result.append('<');
result.append(element->nodeName().lower());
if (element->hasAttributes()) {
unsigned numAttrs = element->attributeCount();
for (unsigned i = 0; i < numAttrs; i++) {
result.append(' ');
const Attribute& attribute = element->attributeItem(i);
result.append(attribute.name().toString());
result.appendLiteral("=\"");
if (!attribute.value().isEmpty()) {
const String& attrValue = attribute.value();
const QualifiedName& attrName = attribute.name();
if (element->hasLegalLinkAttribute(attrName)) {
if (attrValue.startsWith("javascript:", false))
result.append(attrValue);
else {
WebFrameImpl* subFrame = WebFrameImpl::fromFrameOwnerElement(element);
String completeURL = subFrame ? subFrame->frame()->document()->url() :
param->document->completeURL(attrValue);
if (m_localLinks.contains(completeURL)) {
if (!param->directoryName.isEmpty()) {
result.appendLiteral("./");
result.append(param->directoryName);
result.append('/');
}
result.append(m_localLinks.get(completeURL));
} else
result.append(completeURL);
}
} else {
if (param->isHTMLDocument)
result.append(m_htmlEntities.convertEntitiesInString(attrValue));
else
result.append(m_xmlEntities.convertEntitiesInString(attrValue));
}
}
result.append('\"');
}
}
String addedContents = postActionAfterSerializeOpenTag(element, param);
if (element->hasChildren() || param->haveAddedContentsBeforeEnd)
result.append('>');
result.append(addedContents);
saveHTMLContentToBuffer(result.toString(), param);
}
void WebPageSerializerImpl::endTagToString(Element* element,
SerializeDomParam* param)
{
bool needSkip;
StringBuilder result;
result.append(preActionBeforeSerializeEndTag(element, param, &needSkip));
if (needSkip)
return;
if (element->hasChildren() || param->haveAddedContentsBeforeEnd) {
result.appendLiteral("</");
result.append(element->nodeName().lower());
result.append('>');
} else {
if (param->isHTMLDocument) {
result.append('>');
if (!element->isHTMLElement() || !toHTMLElement(element)->ieForbidsInsertHTML()) {
result.appendLiteral("</");
result.append(element->nodeName().lower());
result.append('>');
}
} else {
result.appendLiteral(" />");
}
}
result.append(postActionAfterSerializeEndTag(element, param));
saveHTMLContentToBuffer(result.toString(), param);
}
void WebPageSerializerImpl::buildContentForNode(Node* node,
SerializeDomParam* param)
{
switch (node->nodeType()) {
case Node::ELEMENT_NODE:
openTagToString(toElement(node), param);
for (Node *child = node->firstChild(); child; child = child->nextSibling())
buildContentForNode(child, param);
endTagToString(toElement(node), param);
break;
case Node::TEXT_NODE:
saveHTMLContentToBuffer(createMarkup(node), param);
break;
case Node::ATTRIBUTE_NODE:
case Node::DOCUMENT_NODE:
case Node::DOCUMENT_FRAGMENT_NODE:
ASSERT_NOT_REACHED();
break;
case Node::DOCUMENT_TYPE_NODE:
param->haveSeenDocType = true;
default:
saveHTMLContentToBuffer(createMarkup(node), param);
break;
}
}
WebPageSerializerImpl::WebPageSerializerImpl(WebFrame* frame,
bool recursiveSerialization,
WebPageSerializerClient* client,
const WebVector<WebURL>& links,
const WebVector<WebString>& localPaths,
const WebString& localDirectoryName)
: m_client(client)
, m_recursiveSerialization(recursiveSerialization)
, m_framesCollected(false)
, m_localDirectoryName(localDirectoryName)
, m_htmlEntities(false)
, m_xmlEntities(true)
{
ASSERT(frame);
m_specifiedWebFrameImpl = toWebFrameImpl(frame);
ASSERT(client);
ASSERT(links.size() == localPaths.size());
for (size_t i = 0; i < links.size(); i++) {
KURL url = links[i];
ASSERT(!m_localLinks.contains(url.string()));
m_localLinks.set(url.string(), localPaths[i]);
}
ASSERT(m_dataBuffer.isEmpty());
}
void WebPageSerializerImpl::collectTargetFrames()
{
ASSERT(!m_framesCollected);
m_framesCollected = true;
m_frames.append(m_specifiedWebFrameImpl);
if (!m_recursiveSerialization)
return;
for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) {
WebFrameImpl* currentFrame = m_frames[i];
Document* currentDoc = currentFrame->frame()->document();
RefPtr<HTMLCollection> all = currentDoc->all();
for (unsigned i = 0; Element* element = all->item(i); i++) {
if (!element->isHTMLElement())
continue;
WebFrameImpl* webFrame =
WebFrameImpl::fromFrameOwnerElement(element);
if (webFrame)
m_frames.append(webFrame);
}
}
}
bool WebPageSerializerImpl::serialize()
{
if (!m_framesCollected)
collectTargetFrames();
bool didSerialization = false;
KURL mainURL = m_specifiedWebFrameImpl->frame()->document()->url();
for (unsigned i = 0; i < m_frames.size(); ++i) {
WebFrameImpl* webFrame = m_frames[i];
Document* document = webFrame->frame()->document();
const KURL& url = document->url();
if (!url.isValid() || !m_localLinks.contains(url.string()))
continue;
didSerialization = true;
const WTF::TextEncoding& textEncoding = document->encoding().isValid() ? document->encoding() : UTF8Encoding();
String directoryName = url == mainURL ? m_localDirectoryName : "";
SerializeDomParam param(url, textEncoding, document, directoryName);
Element* documentElement = document->documentElement();
if (documentElement)
buildContentForNode(documentElement, ¶m);
encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished, ¶m, ForceFlush);
}
ASSERT(m_dataBuffer.isEmpty());
m_client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
return didSerialization;
}
}