This source file includes following definitions.
- getSubResourceURLFromElement
- retrieveResourcesForElement
- retrieveResourcesForFrame
- serialize
- serializePageToMHTML
- serializeToMHTML
- serializeToMHTMLUsingBinaryEncoding
- serialize
- retrieveAllResources
- generateMetaCharsetDeclaration
- generateMarkOfTheWebDeclaration
- generateBaseTagDeclaration
#include "config.h"
#include "WebPageSerializer.h"
#include "HTMLNames.h"
#include "WebFrame.h"
#include "WebFrameImpl.h"
#include "WebPageSerializerClient.h"
#include "WebPageSerializerImpl.h"
#include "WebView.h"
#include "WebViewImpl.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
#include "core/frame/LocalFrame.h"
#include "core/html/HTMLAllCollection.h"
#include "core/html/HTMLFrameElementBase.h"
#include "core/html/HTMLFrameOwnerElement.h"
#include "core/html/HTMLInputElement.h"
#include "core/html/HTMLTableElement.h"
#include "core/loader/DocumentLoader.h"
#include "core/page/PageSerializer.h"
#include "platform/SerializedResource.h"
#include "platform/mhtml/MHTMLArchive.h"
#include "platform/weborigin/KURL.h"
#include "public/platform/WebCString.h"
#include "public/platform/WebString.h"
#include "public/platform/WebURL.h"
#include "public/platform/WebVector.h"
#include "wtf/Vector.h"
#include "wtf/text/StringConcatenate.h"
using namespace WebCore;
namespace {
KURL getSubResourceURLFromElement(Element* element)
{
ASSERT(element);
const QualifiedName& attributeName = element->subResourceAttributeName();
if (attributeName == nullQName())
return KURL();
String value = element->getAttribute(attributeName);
if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
return KURL();
return element->document().completeURL(value);
}
void retrieveResourcesForElement(Element* element,
Vector<LocalFrame*>* visitedFrames,
Vector<LocalFrame*>* framesToVisit,
Vector<KURL>* frameURLs,
Vector<KURL>* resourceURLs)
{
ASSERT(element);
if (isHTMLFrameElementBase(*element) || isHTMLObjectElement(*element) || isHTMLEmbedElement(*element)) {
Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame();
if (frame && frame->isLocalFrame()) {
if (!visitedFrames->contains(toLocalFrame(frame)))
framesToVisit->append(toLocalFrame(frame));
return;
}
}
KURL url = getSubResourceURLFromElement(element);
if (url.isEmpty() || !url.isValid())
return;
if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
return;
if (!resourceURLs->contains(url))
resourceURLs->append(url);
}
void retrieveResourcesForFrame(LocalFrame* frame,
const blink::WebVector<blink::WebCString>& supportedSchemes,
Vector<LocalFrame*>* visitedFrames,
Vector<LocalFrame*>* framesToVisit,
Vector<KURL>* frameURLs,
Vector<KURL>* resourceURLs)
{
KURL frameURL = frame->loader().documentLoader()->request().url();
if (!frameURL.isValid())
return;
bool isValidScheme = false;
for (size_t i = 0; i < supportedSchemes.size(); ++i) {
if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
isValidScheme = true;
break;
}
}
if (!isValidScheme)
return;
if (visitedFrames->contains(frame))
return;
visitedFrames->append(frame);
if (!frameURLs->contains(frameURL))
frameURLs->append(frameURL);
RefPtr<HTMLCollection> allElements = frame->document()->all();
for (unsigned i = 0; i < allElements->length(); ++i) {
Element* element = allElements->item(i);
retrieveResourcesForElement(element,
visitedFrames, framesToVisit,
frameURLs, resourceURLs);
}
}
}
namespace blink {
void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam)
{
Vector<SerializedResource> resources;
PageSerializer serializer(&resources);
serializer.serialize(toWebViewImpl(view)->page());
Vector<Resource> result;
for (Vector<SerializedResource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) {
Resource resource;
resource.url = iter->url;
resource.mimeType = iter->mimeType.ascii();
resource.data = WebCString(iter->data->data(), iter->data->size());
result.append(resource);
}
*resourcesParam = result;
}
static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::EncodingPolicy encodingPolicy)
{
Vector<SerializedResource> resources;
PageSerializer serializer(&resources);
serializer.serialize(page);
Document* document = page->mainFrame()->document();
return MHTMLArchive::generateMHTMLData(resources, encodingPolicy, document->title(), document->suggestedMIMEType());
}
WebCString WebPageSerializer::serializeToMHTML(WebView* view)
{
RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page(), MHTMLArchive::UseDefaultEncoding);
return WebCString(mhtml->data(), mhtml->size());
}
WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
{
RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page(), MHTMLArchive::UseBinaryEncoding);
return WebCString(mhtml->data(), mhtml->size());
}
bool WebPageSerializer::serialize(WebLocalFrame* frame,
bool recursive,
WebPageSerializerClient* client,
const WebVector<WebURL>& links,
const WebVector<WebString>& localPaths,
const WebString& localDirectoryName)
{
WebPageSerializerImpl serializerImpl(
frame, recursive, client, links, localPaths, localDirectoryName);
return serializerImpl.serialize();
}
bool WebPageSerializer::retrieveAllResources(WebView* view,
const WebVector<WebCString>& supportedSchemes,
WebVector<WebURL>* resourceURLs,
WebVector<WebURL>* frameURLs) {
WebFrameImpl* mainFrame = toWebFrameImpl(view->mainFrame());
if (!mainFrame)
return false;
Vector<LocalFrame*> framesToVisit;
Vector<LocalFrame*> visitedFrames;
Vector<KURL> frameKURLs;
Vector<KURL> resourceKURLs;
framesToVisit.append(mainFrame->frame());
while (!framesToVisit.isEmpty()) {
LocalFrame* frame = framesToVisit[0];
framesToVisit.remove(0);
retrieveResourcesForFrame(frame, supportedSchemes,
&visitedFrames, &framesToVisit,
&frameKURLs, &resourceKURLs);
}
WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
for (size_t i = 0; i < resourceKURLs.size(); ++i) {
resultResourceURLs[i] = resourceKURLs[i];
size_t index = frameKURLs.find(resourceKURLs[i]);
if (index != kNotFound)
frameKURLs.remove(index);
}
*resourceURLs = resultResourceURLs;
WebVector<WebURL> resultFrameURLs(frameKURLs.size());
for (size_t i = 0; i < frameKURLs.size(); ++i)
resultFrameURLs[i] = frameKURLs[i];
*frameURLs = resultFrameURLs;
return true;
}
WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset)
{
String charsetString = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=" + static_cast<const String&>(charset) + "\">";
return charsetString;
}
WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
{
return String::format("\n<!-- saved from url=(%04d)%s -->\n",
static_cast<int>(url.spec().length()),
url.spec().data());
}
WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
{
if (baseTarget.isEmpty())
return String("<base href=\".\">");
String baseString = "<base href=\".\" target=\"" + static_cast<const String&>(baseTarget) + "\">";
return baseString;
}
}