Commit ab772d5e authored by Benthe Kuijpers's avatar Benthe Kuijpers
Browse files
parents d2e1a81e 99481b20
# svg-sanitizer
[![Build Status](https://travis-ci.org/darylldoyle/svg-sanitizer.svg?branch=master)](https://travis-ci.org/darylldoyle/svg-sanitizer) [![Test Coverage](https://codeclimate.com/github/darylldoyle/svg-sanitizer/badges/coverage.svg)](https://codeclimate.com/github/darylldoyle/svg-sanitizer/coverage)
This is my attempt at building a decent SVG sanitizer in PHP. The work is laregely borrowed from [DOMPurify](https://github.com/cure53/DOMPurify).
## Installation
Either require `enshrined/svg-sanitize` through composer or download the repo and include the old way!
## Usage
Using this is fairly easy. Create a new instance of `enshrined\svgSanitize\Sanitizer` and then call the `sanitize` whilst passing in your dirty SVG/XML
**Basic Example**
```php
use enshrined\svgSanitize\Sanitizer;
// Create a new sanitizer instance
$sanitizer = new Sanitizer();
// Load the dirty svg
$dirtySVG = file_get_contents('filthy.svg');
// Pass it to the sanitizer and get it back clean
$cleanSVG = $sanitizer->sanitize($dirtySVG);
// Now do what you want with your clean SVG/XML data
```
## Output
This will either return a sanitized SVG/XML string or boolean `false` if XML parsing failed (usually due to a badly formatted file).
## Options
You may pass your own whitelist of tags and attributes by using the `Sanitizer::setAllowedTags` and `Sanitizer::setAllowedAttrs` methods respectively.
These methods require that you implement the `enshrined\svgSanitize\data\TagInterface` or `enshrined\svgSanitize\data\AttributeInterface`.
## Remove remote references
You have the option to remove attributes that reference remote files, this will stop HTTP leaks but will add an overhead to the sanitizer.
This defaults to false, set to true to remove references.
`$sanitizer->removeRemoteReferences(true);`
## Viewing Sanitization Issues
You may use the `getXmlIssues()` method to return an array of issues that occurred during sanitization.
This may be useful for logging or providing feedback to the user on why an SVG was refused.
`$issues = $sanitizer->getXmlIssues();`
## Minification
You can minify the XML output by calling `$sanitizer->minify(true);`.
## Demo
There is a demo available at: [http://svg.enshrined.co.uk/](http://svg.enshrined.co.uk/)
## WordPress
I've just released a WordPress plugin containing this code so you can sanitize your WordPress uploads. It's available from the WordPress plugin directory: [https://wordpress.org/plugins/safe-svg/](https://wordpress.org/plugins/safe-svg/)
## Drupal
[Michael Potter](https://github.com/heyMP) has kindly created a Drupal module for this library which is available at: [https://www.drupal.org/project/svg_sanitizer](https://www.drupal.org/project/svg_sanitizer)
## TYPO3
An integration for TYPO3 CMS of this library is available as composer package `t3g/svg-sanitizer` at [https://github.com/TYPO3GmbH/svg_sanitizer](https://github.com/TYPO3GmbH/svg_sanitizer)
## Tests
You can run these by running `vendor/bin/phpunit` from the base directory of this package.
## Standalone scanning of files via CLI
Thanks to the work by [gudmdharalds](https://github.com/gudmdharalds) there's now a standalone scanner that can be used via the CLI.
Any errors will be output in JSON format. See [the PR](https://github.com/darylldoyle/svg-sanitizer/pull/25) for an example.
Use it as follows: `php svg-scanner.php ~/svgs/myfile.svg`
## To-Do
More extensive testing for the SVGs/XML would be lovely, I'll try and add these soon. If you feel like doing it for me, please do and make a PR!
{
"name": "enshrined/svg-sanitize",
"description": "An SVG sanitizer for PHP",
"license": "GPL-2.0-or-later",
"authors": [
{
"name": "Daryll Doyle",
"email": "daryll@enshrined.co.uk"
}
],
"autoload": {
"psr-4": {
"enshrined\\svgSanitize\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"enshrined\\svgSanitize\\Tests\\": "tests"
}
},
"minimum-stability": "stable",
"require": {
"ext-dom": "*",
"ext-libxml": "*"
},
"require-dev": {
"phpunit/phpunit": "^6",
"codeclimate/php-test-reporter": "^0.1.2"
}
}
<?xml version="1.0" encoding="UTF-8"?>
<phpunit bootstrap="vendor/autoload.php"
colors="true"
stopOnFailure="false"
syntaxCheck="false">
<testsuites>
<testsuite name="The project's test suite">
<directory>./tests</directory>
</testsuite>
</testsuites>
<logging>
<log type="coverage-clover" target="./build/logs/clover.xml"/>
</logging>
<filter>
<whitelist addUncoveredFilesFromWhitelist="true">
<directory suffix=".php">./src</directory>
</whitelist>
</filter>
</phpunit>
\ No newline at end of file
<?php
namespace enshrined\svgSanitize\ElementReference;
use enshrined\svgSanitize\data\XPath;
use enshrined\svgSanitize\Exceptions\NestingException;
use enshrined\svgSanitize\Helper;
class Resolver
{
/**
* @var XPath
*/
protected $xPath;
/**
* @var Subject[]
*/
protected $subjects = [];
/**
* @var array DOMElement[]
*/
protected $elementsToRemove = [];
/**
* @var int
*/
protected $useNestingLimit;
public function __construct(XPath $xPath, $useNestingLimit)
{
$this->xPath = $xPath;
$this->useNestingLimit = $useNestingLimit;
}
public function collect()
{
$this->collectIdentifiedElements();
$this->processReferences();
$this->determineInvalidSubjects();
}
/**
* Resolves one subject by element.
*
* @param \DOMElement $element
* @param bool $considerChildren Whether to search in Subject's children as well
* @return Subject|null
*/
public function findByElement(\DOMElement $element, $considerChildren = false)
{
foreach ($this->subjects as $subject) {
if (
$element === $subject->getElement()
|| $considerChildren && Helper::isElementContainedIn($element, $subject->getElement())
) {
return $subject;
}
}
return null;
}
/**
* Resolves subjects (plural!) by element id - in theory malformed
* DOM might have same ids assigned to different elements and leaving
* it to client/browser implementation which element to actually use.
*
* @param string $elementId
* @return Subject[]
*/
public function findByElementId($elementId)
{
return array_filter(
$this->subjects,
function (Subject $subject) use ($elementId) {
return $elementId === $subject->getElementId();
}
);
}
/**
* Collects elements having `id` attribute (those that can be referenced).
*/
protected function collectIdentifiedElements()
{
/** @var \DOMNodeList|\DOMElement[] $elements */
$elements = $this->xPath->query('//*[@id]');
foreach ($elements as $element) {
$this->subjects[$element->getAttribute('id')] = new Subject($element, $this->useNestingLimit);
}
}
/**
* Processes references from and to elements having `id` attribute concerning
* their occurrence in `<use ... xlink:href="#identifier">` statements.
*/
protected function processReferences()
{
$useNodeName = $this->xPath->createNodeName('use');
foreach ($this->subjects as $subject) {
$useElements = $this->xPath->query(
$useNodeName . '[@href or @xlink:href]',
$subject->getElement()
);
/** @var \DOMElement $useElement */
foreach ($useElements as $useElement) {
$useId = Helper::extractIdReferenceFromHref(
Helper::getElementHref($useElement)
);
if ($useId === null || !isset($this->subjects[$useId])) {
continue;
}
$subject->addUse($this->subjects[$useId]);
$this->subjects[$useId]->addUsedIn($subject);
}
}
}
/**
* Determines and tags infinite loops.
*/
protected function determineInvalidSubjects()
{
foreach ($this->subjects as $subject) {
if (in_array($subject->getElement(), $this->elementsToRemove)) {
continue;
}
$useId = Helper::extractIdReferenceFromHref(
Helper::getElementHref($subject->getElement())
);
try {
if ($useId === $subject->getElementId()) {
$this->markSubjectAsInvalid($subject);
} elseif ($subject->hasInfiniteLoop()) {
$this->markSubjectAsInvalid($subject);
}
} catch (NestingException $e) {
$this->elementsToRemove[] = $e->getElement();
$this->markSubjectAsInvalid($subject);
}
}
}
/**
* Get all the elements that caused a nesting exception.
*
* @return array
*/
public function getElementsToRemove() {
return $this->elementsToRemove;
}
/**
* The Subject is invalid for some reason, therefore we should
* remove it and all it's child usages.
*
* @param Subject $subject
*/
protected function markSubjectAsInvalid(Subject $subject) {
$this->elementsToRemove = array_merge(
$this->elementsToRemove,
$subject->clearInternalAndGetAffectedElements()
);
}
}
\ No newline at end of file
<?php
namespace enshrined\svgSanitize\ElementReference;
class Subject
{
/**
* @var \DOMElement
*/
protected $element;
/**
* @var Usage[]
*/
protected $useCollection = [];
/**
* @var Usage[]
*/
protected $usedInCollection = [];
/**
* @var int
*/
protected $useNestingLimit;
/**
* Subject constructor.
*
* @param \DOMElement $element
* @param int $useNestingLimit
*/
public function __construct(\DOMElement $element, $useNestingLimit)
{
$this->element = $element;
$this->useNestingLimit = $useNestingLimit;
}
/**
* @return \DOMElement
*/
public function getElement()
{
return $this->element;
}
/**
* @return string
*/
public function getElementId()
{
return $this->element->getAttribute('id');
}
/**
* @param array $subjects Previously processed subjects
* @param int $level The current level of nesting.
* @return bool
* @throws \enshrined\svgSanitize\Exceptions\NestingException
*/
public function hasInfiniteLoop(array $subjects = [], $level = 1)
{
if ($level > $this->useNestingLimit) {
throw new \enshrined\svgSanitize\Exceptions\NestingException('Nesting level too high, aborting', 1570713498, null, $this->getElement());
}
if (in_array($this, $subjects, true)) {
return true;
}
$subjects[] = $this;
foreach ($this->useCollection as $usage) {
if ($usage->getSubject()->hasInfiniteLoop($subjects, $level + 1)) {
return true;
}
}
return false;
}
/**
* @param Subject $subject
*/
public function addUse(Subject $subject)
{
if ($subject === $this) {
throw new \LogicException('Cannot add self usage', 1570713416);
}
$identifier = $subject->getElementId();
if (isset($this->useCollection[$identifier])) {
$this->useCollection[$identifier]->increment();
return;
}
$this->useCollection[$identifier] = new Usage($subject);
}
/**
* @param Subject $subject
*/
public function addUsedIn(Subject $subject)
{
if ($subject === $this) {
throw new \LogicException('Cannot add self as usage', 1570713417);
}
$identifier = $subject->getElementId();
if (isset($this->usedInCollection[$identifier])) {
$this->usedInCollection[$identifier]->increment();
return;
}
$this->usedInCollection[$identifier] = new Usage($subject);
}
/**
* @param bool $accumulated
* @return int
*/
public function countUse($accumulated = false)
{
$count = 0;
foreach ($this->useCollection as $use) {
$useCount = $use->getSubject()->countUse();
$count += $use->getCount() * ($accumulated ? 1 + $useCount : max(1, $useCount));
}
return $count;
}
/**
* @return int
*/
public function countUsedIn()
{
$count = 0;
foreach ($this->usedInCollection as $usedIn) {
$count += $usedIn->getCount() * max(1, $usedIn->getSubject()->countUsedIn());
}
return $count;
}
/**
* Clear the internal arrays (to free up memory as they can get big)
* and return all the child usages DOMElement's
*
* @return array
*/
public function clearInternalAndGetAffectedElements()
{
$elements = array_map(function(Usage $usage) {
return $usage->getSubject()->getElement();
}, $this->useCollection);
$this->usedInCollection = [];
$this->useCollection = [];
return $elements;
}
}
\ No newline at end of file
<?php
namespace enshrined\svgSanitize\ElementReference;
class Usage
{
/**
* @var Subject
*/
protected $subject;
/**
* @var int
*/
protected $count;
/**
* @param Subject $subject
* @param int $count
*/
public function __construct(Subject $subject, $count = 1)
{
$this->subject = $subject;
$this->count = (int)$count;
}
/**
* @param int $by
*/
public function increment($by = 1)
{
$this->count += (int)$by;
}
/**
* @return Subject
*/
public function getSubject()
{
return $this->subject;
}
/**
* @return int
*/
public function getCount()
{
return $this->count;
}
}
\ No newline at end of file
<?php
namespace enshrined\svgSanitize\Exceptions;
use Exception;
class NestingException extends \Exception
{
/**
* @var \DOMElement
*/
protected $element;
/**
* NestingException constructor.
*
* @param string $message
* @param int $code
* @param Exception|null $previous
* @param \DOMElement|null $element
*/
public function __construct($message = "", $code = 0, Exception $previous = null, \DOMElement $element = null)
{
$this->element = $element;
parent::__construct($message, $code, $previous);
}
/**
* Get the element that caused the exception.
*
* @return \DOMElement
*/
public function getElement()
{
return $this->element;
}
}
\ No newline at end of file
<?php
namespace enshrined\svgSanitize;
class Helper
{
/**
* @param \DOMElement $element