dans.blog: Bug fixes to CFCParser.cfc used by various open source projects (like CFCDocs)

I recently installed a copy of CFCDocs from RIAForge to give our developers better documentation for all of our various components. However, I quickly ran into a few problems related to the CFCParser component that it uses to analyze components.

The CFCParser doesn't create an instance of the object, but instead analyzes the component using a custom parser. The problem is the parser uses some overly simple logic for detecting attribute name/value pairs and for finding starting/ending tag values.

The problem really presents itself when you have a piece of code as follows:

<cfargument name="hightlightStart" type="string" required="false" default="<span style=""background-color:##ffff66;"">" />

In XML, it would be invalid to have an attribute that looked like: default="<span style=""background-color:##ffff66;"">". However, in ColdFusion this is perfectly legally.

After spending way too much time debugging the issue, I found there were two distinct issues:

The getTagAttributes() function used a regex that could not properly handle attributes that had escaped quotes.
The findTags() function did not analyze if the ending tag token was inside a quoted string. So, if you had a > character inside an attribute in a tag the parser would use first match it found as the ending tag token—instead of only the first match outside of the attribute value.

I was able to fix both of these issues by re-writing portions of both the findTags() and getTagAttributes() functions.

Since I don't know who wrote this code or who manages it, I thought I'd at least post the code here in case anyone else has issues w/this component.

<cfcomponent hint="The CFCParser component provides methods to parse a ColdFusion Component.">

  <cffunction name="init" access="public" returntype="CFCParser" output="false" hint="The init method currently does nothing.">
    <cfreturn this>
  </cffunction>

  <cffunction name="findTags" access="private" returntype="array" output="true" hint="The findTags method searches the document for the given startTag and endTag. It returns an array of structures containing the locations in the document of the start and end position of each tag, and the full contents of the tag itself.">
    <cfargument name="document" type="string" required="yes">
    <cfargument name="startTag" type="string" required="yes">
    <cfargument name="endTag" type="string" required="yes">

    <!--- Find and remove comments --->
    <cfset var tagLocations = arrayNew(1)>
    <cfset var nestingLevel = 1>
    <cfset var searchMode = "start">
    <cfset var position = 1>
    <cfset var i = 0>
    <cfset var j = 0>
    <cfset var tagBegin = 0>
    <cfset var tagEnd = 0>
    <cfset var tagBlock = "">
    <cfset var tmpPosition = 0>
    <cfset var nestCount = 0>
    <cfset var padding = "">
    <cfset var lastReturn = "">
    <cfset var lastSpace = "">
    <cfset var stTag = "">
    <!----// captures quoted strings and the end tag //--->
    <cfset var regexFindEndTag = '(((("")|".*?[^"]"(?!"))|(('''')|''.*?[^'']''(?!''))))|(#arguments.endTag#)' />
    <cfset var findEndTag = 0 />
    <cfset var findEndTagMatch = "" />

    <cfloop from="1" to="#len(document)#" index="i">

      <cfif searchMode is "start">

        <cfset tagBegin = findNoCase(startTag,document,position)>

        <cfif tagBegin>
          <cfset position = tagBegin + len(startTag)>
          <cfset searchMode = "end">
          <!--- <cfoutput>Start Tag found at character #tagBegin#<br></cfoutput> --->
        <cfelse>
          <cfbreak>
        </cfif>

      <cfelse>
        <cfset findEndTagMatch = "" />
        <!---// if finding complex ending tokens, use original simple find //--->
        <cfif arguments.endTag neq ">">
          <cfset tagEnd = find(endTag,document,position)>
        <!---// for other logic, we need to make sure we don't accidentally find the ending tag inside a quoted string //--->
        <cfelse>

          <!---// loop through quotes strings and end tag matches looking for the first end tag match //--->
          <cfloop condition="findEndTagMatch neq arguments.endTag">
            <!---// we're going to loop through quoted strings and matching end tags looking for a match //--->
            <cfset findEndTag = reFindNoCase(regexFindEndTag, document, position, true) />

            <!---// if a match was found, get it //--->
            <cfif findEndTag.pos[1]>
              <!---// get the match--which is either a quoted string or the matching end tag //--->
              <cfset findEndTagMatch = mid(document, findEndTag.pos[1], findEndTag.len[1]) />
              <!---// update the position in the search //--->
              <cfset position = findEndTag.pos[1] + findEndTag.len[1] />

            <!---// if no matches, stop //--->
            <cfelse>
              <cfbreak />
            </cfif>
          </cfloop>

          <!---// return the ending position //--->
          <cfset tagEnd = findEndTag.pos[1] />
        </cfif>

        <cfif tagEnd>
          <cfset tagEnd = tagEnd + len(endTag)>
          <cfset position = tagEnd>
          <!--- <cfoutput>End Tag found at character #tagEnd#<br></cfoutput> --->
        <cfelse>
          <cfbreak>
        </cfif>

        <cfset tagBlock = mid(document,tagBegin,tagEnd-tagBegin)>

        <cfset tmpPosition = 1>
        <cfset nestCount = 0>
        <cfloop from="1" to="#len(tagBlock)#" index="j">
          <cfif findNoCase(startTag,tagBlock,tmpPosition)>
            <cfset tmpPosition = findNoCase(startTag,tagBlock,tmpPosition) + len(startTag)>
            <cfset nestCount = nestCount + 1>
          <cfelse>
            <cfbreak>
          </cfif>
          <!--- <cfoutput>TmpPosition: #tmpPosition#(#htmlEditFormat(mid(tagBlock,tmpPosition,len(tagBlock)))#)<br></cfoutput> --->
        </cfloop>

        <!--- <cfoutput>count - #nestCount# :: Level - #nestingLevel#<br></cfoutput> --->
        <cfif nestCount EQ nestingLevel>

          <cfset lastSpace = reFindNoCase('[#chr(32)##chr(9)#][^#chr(32)##chr(9)#]+$',tagBlock)>
          <cfset lastReturn = reFindNoCase('[#chr(10)##chr(13)#][^#chr(10)##chr(13)#]+$',tagBlock)>

          <cfset padding = "">

          <cfif lastReturn AND lastSpace AND lastReturn LT lastSpace>
            <cfset padding = mid(tagBlock,lastReturn+1,lastSpace-lastReturn)>
          </cfif>

          <cfset stTag = structNew()>
          <cfset stTag.start = tagBegin>
          <cfset stTag.end = tagEnd>
          <cfset stTag.tagBlock = padding & tagBlock>
          <cfset arrayAppend(tagLocations,stTag)>
          <cfset searchMode = "start">
        <cfelse>
          <cfset nestingLevel = nestingLevel + 1>
        </cfif>

      </cfif>

    </cfloop>

    <cfreturn tagLocations>
  </cffunction>

  <cffunction name="removeComments" access="private" output="false" returntype="string" hint="Strips the comments from a document so that code inside comments gets ignored by the findTags method">
    <cfargument name="document" type="string" required="yes">

    <cfset var tagLocations = findTags(arguments.document,"<!---","--->")>

    <cfset var offset = 0>
    <cfset var i = 0>

    <cfset var start = 0>
    <cfset var count = 0>

    <cfloop from="1" to="#arrayLen(tagLocations)#" index="i">
      <cfset start = tagLocations[i].start - offset>
      <cfset count = tagLocations[i].end - tagLocations[i].start>
      <cfset arguments.document = removeChars(arguments.document,start,count)>
      <cfset offset = offset + count>
    </cfloop>

    <cfreturn document>
  </cffunction>

  <cffunction name="getMethods" access="private" returntype="array" output="false" hint="Calls the findTags method to retrieve all cffunction tags in the given document.">
    <cfargument name="document" type="string" required="true">
    <cfreturn findTags(document,"<cf"&"function ","</cf"&"function>") />
  </cffunction>

  <cffunction name="getProperties" access="private" returntype="array" output="false" hint="Calls the findTags method to retrieve all cffunction tags in the given document.">
    <cfargument name="document" type="string" required="true">
    <cfreturn findTags(document,"<cf"&"property ",">")>
  </cffunction>

  <cffunction name="getArguments" access="private" returntype="array" output="false" hint="Calls the findTags method to retrieve all cfarguments tags in the given document. This method should be passed the body of a cffunction tag as the document argument.">
    <cfargument name="document" type="string" required="true">
    <cfreturn findTags(document,"<cf"&"argument ",">")>
  </cffunction>

  <cffunction name="getTagAttributes" access="private" returntype="struct" output="false" hint="Parses the attributes out of the given document for the first occurrence of the tag specified and returns a structure containing name value pairs for the tag attributes.">
    <cfargument name="document" type="string" required="true">
    <cfargument name="tagname" type="string" required="true">

    <cfset var startTag = "">
    <cfset var stAttributes = structNew()>
    <!--- fails on escaped quotes inside the attribute
    <cfset var regex = '[[:space:]][^=]+="[^"]*"' >
    --->
    <!---// get attributes, make sure we get escaped quotes inside quoted strings //--->
    <cfset var regex = '\s[^=]+=((("")|".*?[^"]"(?!"))|(('''')|''.*?[^'']''(?!'')))' />
    <cfset var aTmp = reFindNoCase('<#arguments.tagname#(#regex#)*[^>]*>',document,1,true)>
    <cfset var i = 1>
    <cfset var position = 1>
    <cfset var attribute = "">
    <cfset var attrName = "">
    <cfset var attrValue = "">

    <cfif NOT aTmp.pos[1]>
      <cfreturn stAttributes>
    </cfif>

    <!---// refactored code to use reMatch //--->
    <cfset startTag = mid(document,aTmp.pos[1],aTmp.len[1])>
    <cfset aTmp = reMatchNoCase(regex, startTag) />

    <cfloop index="i" array="#aTmp#">
      <cfset attribute = trim(i) />
      <cfset attrName = listFirst(attribute, "=") />
      <cfset attrValue = listRest(attribute, "=") />
      <cfset stAttributes[attrName] = mid(attrValue, 2, len(attrValue)-2) />
    </cfloop>

    <cfset stAttributes.fullTag = startTag>

    <cfreturn stAttributes>
  </cffunction>

  <cffunction name="parse" access="public" returntype="struct" output="false" hint="Provides the public interface to the CFC parser. This method should be passed the contents of a full ColdFusion component file.">
    <cfargument name="document" type="string" required="true">

    <cfset var cleanDocument = "">
    <cfset var stComponent = "">
    <cfset var aMethods = "">
    <cfset var i = "">
    <cfset var j = "">
    <cfset var stMethod = "">
    <cfset var stArgument = "">
    <cfset var aProperties = "">
    <cfset var stProperty = "">
    <cfset var aArguments = "">
    <cfset var attribStruct = structNew()>

    <cfset cleanDocument = removeComments(document)>

    <cfset stComponent = structNew()>
    <cfset stComponent.isInterface = false>
    <cfset stComponent.attributes = structNew()>
    <cfset stComponent.attributes.hint = "">
    <cfset stComponent.attributes.extends = "cfcomponent">
    <cfset stComponent.attributes.implements = "cfinterface">
    <cfset stComponent.attributes.displayname = "">
    <cfset stComponent.attributes.output = "">

    <!--- check to see if it is a component --->
    <cfset attribStruct = getTagAttributes(cleanDocument,'cfcomponent')>
    <cfif structIsEmpty(attribStruct)>
      <!--- if no attribs found, it might be an interface --->
      <cfset attribStruct = getTagAttributes(cleanDocument,'cfinterface')>
      <cfif NOT structIsEmpty(attribStruct)>
        <cfset stComponent.isInterface = true>
      </cfif>
    </cfif>
    <cfset structAppend(stComponent.attributes,attribStruct,true)>

    <cfset stComponent.properties = structNew()>

    <cfset aProperties = getProperties(cleanDocument)>

    <cfloop from="1" to="#arrayLen(aProperties)#" index="j">
        <cfset stProperty = structNew()>
        <cfset stProperty.name = "">
        <cfset stProperty.type = "any">
        <cfset stProperty.required = "false">
        <cfset stProperty.default = "_an_empty_string_">
        <cfset stProperty.displayName = "">
        <cfset stProperty.hint = "">
        <cfset structAppend(stProperty,getTagAttributes(aProperties[j].tagBlock,'cfproperty'),true)>
        <cfset stComponent.properties[stProperty.name] = stProperty>
      </cfloop>

    <cfset stComponent.methods = structNew()>

    <cfset aMethods = getMethods(cleanDocument)>

    <cfloop from="1" to="#arrayLen(aMethods)#" index="i">
      <cfset stMethod = structNew()>
      <cfset stMethod.name = "">
      <cfset stMethod.access = "public">
      <cfset stMethod.returnType = "any">
      <cfset stMethod.roles = "">
      <cfset stMethod.output = "">
      <cfset stMethod.displayname = "">
      <cfset stMethod.hint = "">
      <cfset structAppend(stMethod,getTagAttributes(aMethods[i].tagblock,'cffunction'),true)>
      <cfset stMethod.fullTag = aMethods[i].tagBlock>
      <cfset stComponent.methods[stMethod.name] = stMethod>

      <cfset stMethod.arguments = arrayNew(1)>
      <cfset aArguments = getArguments(aMethods[i].tagBlock)>

      <cfloop from="1" to="#arrayLen(aArguments)#" index="j">
        <cfset stArgument = structNew()>
        <cfset stArgument.name = "">
        <cfset stArgument.type = "any">
        <cfset stArgument.required = "false">
        <cfset stArgument.displayName = "">
        <cfset stArgument.hint = "">
        <cfset stArgument.default = "_an_empty_string_">
        <cfset structAppend(stArgument,getTagAttributes(aArguments[j].tagBlock,'cfargument'),true)>
        <cfset arrayAppend(stMethod.arguments,stArgument)>
      </cfloop>

    </cfloop>

    <cfreturn stComponent>
  </cffunction>

</cfcomponent>

NOTE:
I also updated all the unscoped variables!

[UPDATED: Thursday, February 24, 2011 at 2:21:00 PM]

Categories: HTML/ColdFusion, Source Code

Comments for this entry have been disabled.