Changeset ad35672


Ignore:
Timestamp:
15/04/2012 00:12:29 (3 years ago)
Author:
Eric van der Vlist <vdv@dyomedea.com>
Branches:
master
Children:
c4c4108
Parents:
ba51ddf
git-author:
Eric van der Vlist <vdv@dyomedea.com> (15/04/2012 00:12:29)
git-committer:
Eric van der Vlist <vdv@dyomedea.com> (15/04/2012 00:12:29)
Message:

Still work in progress, but the WARC archive now validates with warc-tools' warcvalid.py...

Location:
archiver/pipelines/actions
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • archiver/pipelines/actions/package-archive.xpl

    rba51ddf rad35672  
    212212            <document xsl:version="2.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xs:string" content-type="text/plain"> 
    213213              <xsl:apply-templates select="$request" mode="warc"/> 
    214               <xsl:apply-templates select="$response" mode="warc"/> 
     214              <xsl:apply-templates select="$response" mode="warc"> 
     215                <xsl:with-param name="document-length" as="xs:integer" select="string-length(translate(/archive/response/document, ' &#xa;&#xd;', '')) * 3 div 4" tunnel="yes"/> 
     216              </xsl:apply-templates> 
    215217            </document> 
    216218          </xsl:template> 
  • archiver/pipelines/actions/warc-lib.xsl

    rba51ddf rad35672  
    3131 
    3232    <xsl:template match="record" mode="warc"> 
     33        <xsl:param name="document-length" as="xs:integer" select="0" tunnel="yes"/> 
     34        <xsl:call-template name="CRLF"/> 
     35        <xsl:call-template name="CRLF"/> 
    3336        <xsl:apply-templates select="header" mode="warc"/> 
    3437        <xsl:variable name="block"> 
     
    3942                <name>Content-Length</name> 
    4043                <value> 
    41                     <xsl:value-of select="string-length($block)"/> 
     44                    <xsl:value-of select="string-length($block) + $document-length "/> 
    4245                </value> 
    4346            </field> 
     
    4649        <xsl:call-template name="CRLF"/> 
    4750        <xsl:value-of select="$block"/> 
    48         <xsl:call-template name="CRLF"/> 
    49         <xsl:call-template name="CRLF"/> 
    5051    </xsl:template> 
    5152 
     
    7273 
    7374    <xsl:template match="response" mode="warc-http"> 
     75        <!--<xsl:message> 
     76            <xsl:value-of select="string-length(document)"/> 
     77            <xsl:text> - </xsl:text> 
     78            <xsl:value-of select="string-length(translate(document, ' &#xa;&#xd;', ''))"/> 
     79        </xsl:message>--> 
    7480        <line> 
    7581            <!-- TODO: get the HTTP version and status--> 
Note: See TracChangeset for help on using the changeset viewer.