Mini Shell
"""
SecAuditLog parser
"""
import json
import ipaddress
import os
import os.path
import posixpath
import re
import urllib.parse
from abc import ABCMeta, abstractmethod
from contextlib import suppress
from copy import copy
from http.cookies import CookieError, SimpleCookie
from itertools import product
from logging import getLogger
from typing import Optional, Tuple
from urllib.parse import parse_qs
from defence360agent.utils.common import DAY, rate_limit
from defence360agent.utils import USER_IDENTITY_FIELD, user_identity
from im360.contracts.config import ModsecSensor
ANOMALITY_SCORE_FIELDS = (
"inbound_anomality_score",
"outbound_anomality_score",
)
SEVERITY, ADVANCED, HEADERS = "severity", "advanced", "headers"
URI, HTTP_METHOD, FORM_PARAMS, QUERY_PARAMS, ATTACKERS_IP, STATUS_CODE = (
"uri",
"http_method",
"form",
"query",
"attackers_ip",
"status_code",
)
ENGINE_MODE = "engine_mode"
# fields to pass from ModSecurity to Imunify sensor socket
PICK_SECAUDITLOG_FIELDS = (
"User-Agent",
"Host",
ATTACKERS_IP,
"transaction_id",
"rule",
"msg",
"message",
"access_denied",
SEVERITY,
"ver",
"tag",
"Producer",
"modsec_version",
"vendor",
STATUS_CODE,
ENGINE_MODE,
ADVANCED,
) + ANOMALITY_SCORE_FIELDS
_COOKIE, _ = _SENSITIVE_HEADERS = ("cookie", "authorization")
_PARSE_SCORE_REGEX = re.compile(
r"(Inbound|Outbound) Anomaly Score.*?(\d+)", re.IGNORECASE
)
logger = getLogger(__name__)
throttled_log_exception = rate_limit(period=DAY)(logger.exception)
class ParseError(RuntimeError):
"""
log as logger.exception(*e.args) to avoid sentry duplicates
"""
pass
class _AmbiguousSeverity(RuntimeError):
"""
to log as warning
"""
pass
class MalformedFileError(RuntimeError):
"""
log as logger.exception(*e.args) to avoid sentry duplicates
"""
pass
class _MiscDataNotInteresting(Exception):
pass
class _SerialLogSectionParser(metaclass=ABCMeta):
_IS_APPLICABLE_SINCE_REGEX = None
@classmethod
def is_applicable_since(cls, line):
assert cls._IS_APPLICABLE_SINCE_REGEX, "regex should be implemented"
return cls._IS_APPLICABLE_SINCE_REGEX.match(line) is not None
@classmethod
@abstractmethod
def parse_name_value_tokens(cls, line):
"""
:return: name, value tokens
:raise ValueError: if cannot parse string
"""
pass
class _SectionAParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}A--$")
_PARSE_IP_REGEX = re.compile(r"^\[.*?\] (?P<id>\S+) (?P<ip>\S+)")
@classmethod
def parse_name_value_tokens(cls, line):
match = cls._PARSE_IP_REGEX.match(line)
if not match:
logger.warning(
"--section-A--: cannot parse this line: %s", repr(line)
)
raise _MiscDataNotInteresting()
try:
ipv4_or_ipv6 = ipaddress.ip_address(match.group("ip"))
except ValueError:
logger.warning(
"--section-A--: cannot use %s for IPv4 or IPv6 address",
match.group(1),
)
raise _MiscDataNotInteresting()
yield "attackers_ip", str(ipv4_or_ipv6)
yield "transaction_id", match.group("id")
class _SectionBParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}B--$")
_PARSE_NAME_VALUE = re.compile(r"^(\S*): ?(.*)$")
@classmethod
def parse_name_value_tokens(cls, line):
match = cls._PARSE_NAME_VALUE.match(line)
if match:
# supply None instead of an empty string
name, value = match.group(1), match.group(2) or None
(
maybe_obfuscated_value,
is_required,
) = obfuscate_if_sensitive_and_required(name, value)
if is_required:
yield HEADERS, [name, maybe_obfuscated_value]
else:
try:
# GET /a/c/getName/?param1=abs HTTP/1.1 HTTP/1.1
method, uri, *_ = line.split()
if method.isupper() and os.path.isabs(uri):
yield HTTP_METHOD, method
parsed = urllib.parse.urlparse(uri)
yield URI, parsed.path
if ModsecSensor.SEND_ADDITIONAL_DATA:
yield QUERY_PARAMS, obfuscate(parsed.query)
except ValueError:
raise _MiscDataNotInteresting()
class _SectionCParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}C--$")
@classmethod
def parse_name_value_tokens(cls, line):
if ModsecSensor.SEND_ADDITIONAL_DATA:
yield FORM_PARAMS, obfuscate(line)
class _SectionFParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}F--$")
_STATUS_CODE_REGEX = re.compile(r"^HTTP/\d\.\d (\d+).*$")
@classmethod
def parse_name_value_tokens(cls, line):
match = cls._STATUS_CODE_REGEX.match(line)
if not match:
raise _MiscDataNotInteresting()
else:
yield STATUS_CODE, match.group(1)
class _BaseSectionHParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}H--$")
_PARSE_MESSAGE_APACHE_ERROR = re.compile(
r"([^=]*) (ModSecurity):\s+([^=]*)"
)
_PARSE_MESSAGE_REGEX = re.compile(r'(\[\w+ ".*?"\])')
_PARSE_ACCESS_DENIED_CODE = re.compile(r"Access denied with code \d+")
_PARSE_MESSAGE_TOKENS_REGEX = re.compile(r'\[(\w+) "(.*?)"\]')
_INCOMPLETE_MESSAGE_REGEX = re.compile(r'(\[\w+ ".*?$)')
@classmethod
def parse_name_value_tokens(cls, line):
name, value = cls._parse_name_value(line)
# ModSecurity name used for modsec version 3.x
if name in ["Message", "ModSecurity"]:
return name, *cls._parse_message(value, line)
if name == "Apache-Error":
return cls._parse_apache_err(value, line)
# note: ModSecurity v3 with `SecAuditLogFormat Native` has not
# `Engine-Mode` (secrules_engine) field currently
if name == "Engine-Mode":
name = ENGINE_MODE
value = value.strip('"') # remove surrounding double quotes
return name, value
@classmethod
def _parse_name_value(cls, s):
"""
:raise ValueError: if cannot parse string
"""
name, value = s.split(maxsplit=1)
if name[-1] != ":":
raise ValueError('Expecting sort of "Name: value"')
else:
# chop colon character
name = name[:-1]
return name, value
@classmethod
def _parse_apache_err(cls, msgval, _):
match = cls._PARSE_MESSAGE_APACHE_ERROR.match(msgval)
if match:
_, name, modsec = match.groups()
return name, *cls._parse_message(modsec, "")
else:
# Apache-Error line dosn't have ModSecurity part
raise _MiscDataNotInteresting()
@classmethod
def _parse_message(cls, msgval, audit_log_line):
"""
Parse string with format [key "val"] [key "val"] ... [key "val"]
to dictionary {"key": "val", "key": "val", ...}
In case when there is uncompleted value part
e.g. '[key "val'
with lost '"]' in the end
it also returns incomplete part as string '[key "val'
"""
tokens_list = cls._PARSE_MESSAGE_REGEX.findall(msgval)
tokens = " ".join(tokens_list)
msgtxt = msgval
for token in tokens_list:
msgtxt = msgtxt.replace(token, "").strip()
incomplete_part = cls._INCOMPLETE_MESSAGE_REGEX.findall(msgtxt)
for token in incomplete_part:
msgtxt = msgtxt.replace(token, "").strip()
result = dict(message=msgtxt)
match = cls._PARSE_ACCESS_DENIED_CODE.search(msgtxt)
if match:
result["access_denied"] = True
for token in cls._PARSE_MESSAGE_TOKENS_REGEX.finditer(tokens):
name, text = token.groups()
existing = result.get(name)
if existing is not None:
if name == "severity":
logger.warning("Ambiguous severity: %r", audit_log_line)
raise _AmbiguousSeverity()
if not isinstance(existing, list):
# convert multiple token entries into a list
existing = result[name] = [existing]
existing.append(text)
elif name == "tag":
# 'tag' is always a list
result[name] = [text]
elif name == "id":
# "rule_id" to be consistent with other plugins
result["rule"] = text
if text is None:
throttled_log_exception(
"rule field is None, modsec message: %s", msgval
)
else:
result[name] = text
if name == "msg":
anomality_score_items = get_anomality_score_items(text)
result.update(anomality_score_items)
return result, *incomplete_part
class _SectionHParser(_BaseSectionHParser):
"""
_BaseSectionHParser + DEF-2617 workaround
"""
_INCOMPLETE_RESULT = {
"name": "",
"value": dict(),
"incomplete_part": "",
"count": 0,
}
_MAX_INCOMPLETE_MESSAGE = 4
def __init__(self):
self._open_matched_string = False
self._incomplete_result = copy(self._INCOMPLETE_RESULT)
def parse_name_value_tokens(self, line):
"""
In case with correct line e.g.
Name: Message [key "val"] [key "val"] [key "val"]
if there is no data in self._incomplete_result:
returns result of _BaseSectionHParser.parse_name_value_tokens
else
returns name, value of self._incomplete_result
and name, value of current line
it is possible since this function is generator.
In case with incomplete line e.g.
Name: Message [key "val"] [key "val"] [key "val
if there is no data in self._incomplete_result:
collect name, value and incomplete part values
that was got from _BaseSectionHParser.parse_name_value_tokens
to self._incomplete_result and wait next line.
returns empty list
else:
does the same but
returns name, value of self._incomplete_result
In case with non 'name: value' format:
if there is no data in self._incomplete_result:
raises
else:
Concatenate incomplete result of previous
iteration with current line and parse it
by _BaseSectionHParser._parse_message.
if there is still incomplete line:
it updates current self._incomplete_result and
returns empty list
else:
returns name, value from collected data
"""
result = []
try:
_res = super().parse_name_value_tokens(line)
except ValueError:
_count = self._incomplete_result["count"]
if not _count or _count > self._MAX_INCOMPLETE_MESSAGE:
if not self._open_matched_string:
raise
else:
raise _MiscDataNotInteresting()
_res = self._parse_message(
self._incomplete_result["incomplete_part"] + line, line
)
_res[0].pop("message")
if len(_res) == 1:
self._incomplete_result["value"].update(_res[0])
name = self._incomplete_result["name"]
value = self._incomplete_result["value"]
result.append((name, value))
self._incomplete_result = copy(self._INCOMPLETE_RESULT)
else:
self._incomplete_result["value"].update(_res[0])
self._incomplete_result["incomplete_part"] = _res[1]
self._incomplete_result["count"] += 1
else:
# If line was successfully parsed
if self._incomplete_result["count"]:
_name = self._incomplete_result["name"]
_value = self._incomplete_result["value"]
_value["message"] += self._incomplete_result["incomplete_part"]
result.append((_name, _value))
if len(_res) == 3:
name, value, incomplete = _res[0], _res[1], _res[2]
if name == "Message" and "[MatchedString" in line:
value["message"] += incomplete
result.append((name, value))
self._incomplete_result = copy(self._INCOMPLETE_RESULT)
else:
self._incomplete_result["name"] = name
self._incomplete_result["value"] = value
self._incomplete_result["incomplete_part"] = incomplete
self._incomplete_result["count"] = 1
else:
name, value = _res[0], _res[1]
result.append((name, value))
self._incomplete_result = copy(self._INCOMPLETE_RESULT)
self._open_matched_string = (
name == "Message" and "[MatchedString" in line
)
finally:
for name, value in result:
yield name, value
class _SectionZParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}Z--$")
@classmethod
def parse_name_value_tokens(cls, line):
raise NotImplementedError("Expecting this method never be called.")
class _SectionNotInterestedInParser(_SerialLogSectionParser):
_IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}[^ABHZ]--$")
@classmethod
def parse_name_value_tokens(cls, line):
raise _MiscDataNotInteresting()
class Parser(metaclass=ABCMeta):
# all of ascii table from 32 to 126 (printable ascii chars) but, '%'
_SAFE_UNQUOTED = "".join(
chr(c)
for c in range(
32,
# off-by-1 obviously
126 + 1,
)
if chr(c) != "%"
)
@abstractmethod
def feed(self, bytes_):
"""
:param bytes_: audit log line
:return dict: for complete result and None for incomplete.
See unit tests for result dict fields.
:raise ParseError:
"""
pass
def flush(self):
pass
@classmethod
def _adaptive_decode_bytes(cls, bytes_):
"""
apply fallback on UnicodeDecodeError
"""
try:
return cls.decode_bytes(bytes_)
except UnicodeDecodeError:
return urllib.parse.quote_from_bytes(
bytes_, safe=cls._SAFE_UNQUOTED
)
@staticmethod
def decode_bytes(bytes_):
"""
because cannot mock 'read-only' bstr.decode() attr
"""
return bytes_.decode()
class _RevolverParser(Parser):
# skip first batch of errors if Parsers starts parsing
# in the middle of serial log on agent restart
_SKIP_FIRST_NUM_ERRORS = 11
_available_parsers = []
def __init__(self, audit_logdir_path=None):
"""
:param str audit_logdir_path: audit log dir path for ModSecurity
concurrent mode
"""
self._audit_logdir_path = audit_logdir_path
self._revolve()
self._skip_first_num_errors = self._SKIP_FIRST_NUM_ERRORS
def feed(self, bytes_):
try:
result = self._feed_impl(bytes_)
except ParseError:
if self._skip_first_num_errors > 0:
# skip "first num errors"
self._skip_first_num_errors -= 1
return None
else:
raise
else:
if bytes_.strip():
# starting from the first non empty and succefully processed
# token we expect no errors
self._skip_first_num_errors = 0
return result
def flush(self):
return self._current_parser.flush()
def _feed_impl(self, bytes_):
for attempt in range(1, len(self._available_parsers) + 1):
try:
return self._current_parser.feed(bytes_)
except ParseError as e:
if attempt < len(self._available_parsers):
# give the next parser a chance...
self._revolve()
else: # raise on last attempt
raise ParseError(
"Neither of available parsers is capable "
"to parse this: %r ",
bytes_,
) from e
def _revolve(self):
"""Exchange parser to the next"""
assert (
len(self._available_parsers) >= 2
), "Count of available_parsers should not be less than 2"
logger.info(
"Swap %s<->%s",
self._available_parsers[0].__name__,
self._available_parsers[1].__name__,
)
self._available_parsers = (
self._available_parsers[1:] + self._available_parsers[:1]
)
# use blank parser state
cls = self._available_parsers[0]
parser_kwargs = dict()
if self._audit_logdir_path is not None:
parser_kwargs["audit_logdir_path"] = self._audit_logdir_path
self._current_parser = cls(**parser_kwargs)
class _JsonLogParserBase(Parser):
def __init__(self, **_):
self._accumulating_result = {}
self._accumulating_lines = []
def parse_json_data(self, data: dict) -> dict:
raise NotImplementedError()
def feed(self, bytes_):
line = self._adaptive_decode_bytes(bytes_.rstrip())
if line:
try:
self._accumulating_lines.append(line)
self._accumulating_result = self.parse_json_data(
json.loads(line)
)
return self.flush()
except _MiscDataNotInteresting:
return None
except Exception as e:
raise ParseError(
"Error occurs while parse json line %r, reason: %r"
% (line, e)
) from e
def flush(self):
try:
return mk_modsec_indicent_list(
self._accumulating_result, debug_ctx=self._accumulating_lines
)
finally:
self._accumulating_result = {}
self._accumulating_lines = []
class _JsonLogParserv2(_JsonLogParserBase):
"""Parse ModSecurity v2 json log entries."""
def parse_json_data(self, data: dict) -> dict:
"""
The contents of the log entry can be seen here
https://github.com/SpiderLabs/ModSecurity/blob/v2/master/apache2/msc_logging.c#L644
Optional fields may be omitted due to SecAuditLogParts setting.
Expected data (some unused keys are omitted):
{'transaction': {
'time': str,
'transaction_id': str,
'remote_address': str,
'remote_port': int,
'local_address': str,
'local_port': int
},
'request': {
'request_line': str, # optional field
'body': [], # optional field
'headers': {}, # optional field
},
'response': {
'protocol': str, # optional field
'body': str, # optional field
'headers': {}, # optional field
'status': int, # optional field
},
'audit_data': {
'producer': str or [], # optional field
'messages': [], # optional field
'error_messages': [], # optional field
'engine_mode': str, # optional field
}
}
""" # noqa: E501
expected_data_keys = {
"transaction",
"request",
"response",
"audit_data",
}
if not expected_data_keys.issubset(data.keys()):
raise ParseError("Not expected json data for ModSecurity v2")
result = {}
transaction = data["transaction"]
additional_data_is_required = ModsecSensor.SEND_ADDITIONAL_DATA
# section A
try:
result[ATTACKERS_IP] = str(
ipaddress.ip_address(transaction["remote_address"])
)
except ValueError:
logger.warning(
"Cannot use %s for IPv4 or IPv6 address",
transaction["remote_address"],
)
raise _MiscDataNotInteresting()
result["transaction_id"] = transaction.get("transaction_id", "-")
# section B
result[HEADERS] = []
for header, value in data["request"].get("headers", {}).items():
(
maybe_obfuscated_value,
is_required,
) = obfuscate_if_sensitive_and_required(header, value)
if is_required:
result[HEADERS].append([header, maybe_obfuscated_value])
if data["request"].get("request_line"):
# GET /a/c/getName/?param1=abs HTTP/1.1
method, uri, *_ = data["request"]["request_line"].split()
if method.isupper() and os.path.isabs(uri):
result[HTTP_METHOD] = method
parsed = urllib.parse.urlparse(uri)
result[URI] = parsed.path
if additional_data_is_required:
result[QUERY_PARAMS] = obfuscate(parsed.query)
# section C
if data["request"].get("body") and additional_data_is_required:
result[FORM_PARAMS] = obfuscate("".join(data["request"]["body"]))
# section F
if data["response"].get("status"):
result[STATUS_CODE] = data["response"]["status"]
# section H
if data["audit_data"].get("producer"):
if isinstance(data["audit_data"]["producer"], str):
version, _ = get_producer_data(data["audit_data"]["producer"])
result["modsec_version"] = version
else:
modsec_full_name, *vendors = data["audit_data"]["producer"]
version, _ = get_producer_data(modsec_full_name)
result["vendor"] = vendors
result["modsec_version"] = version
if data["audit_data"].get("engine_mode"):
result[ENGINE_MODE] = data["audit_data"]["engine_mode"]
if data["audit_data"].get("messages"):
result["MessageList"] = [
_SectionHParser._parse_message(msg, "")[0]
for msg in data["audit_data"]["messages"]
]
return result
class _KeyMappedJsonLogParserv3(_JsonLogParserBase):
"""Parse ModSecurity v3 json log entries."""
KEYS_MAPPER = {} # Defines field mappings
def _to_lower_keys(self, obj):
"""
Assume that *obj* is json serializeble.
"""
if isinstance(obj, dict):
# Leave the headers as is
return {
k.lower(): (
self._to_lower_keys(v) if k.lower() != "headers" else v
)
for k, v in obj.items()
}
elif isinstance(obj, list):
return [self._to_lower_keys(item) for item in obj]
else:
return obj
def parse_json_data(self, data: dict) -> dict:
"""
Parse log entry data for JSON SecAuditLogFormat.
https://github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual-(v2.x)#SecAuditLogFormat
The contents of the log entry can be seen here
https://github.com/SpiderLabs/ModSecurity/blob/v3/master/src/transaction.cc#L1622
Optional fields may be omitted due to SecAuditLogParts setting.
Expected data:
{'transaction': {
'client_ip': str,
'time_stamp': str,
'server_id': str,
'client_port': int,
'host_ip': str,
'host_port': int,
'unique_id': str,
'request': {
'http_version': str,
'method': str,
'uri': str,
'body': str, # optional field
'headers': {}, # optional field
},
'response': {
'body': str, # optional field
'headers': {}, # optional field
'http_code': int,
},
# optional fields below
'producer': {
'components': [],
'connector': str,
'modsecurity': str,
'secrules_engine': str,
},
'messages': [{'details': {'accuracy': str,
'data': str,
'file': str
'lineNumber': str,
'match': str,
'maturity': str,
'reference': str,
'rev': '1',
'ruleId': str,
'severity': str,
'tags': [],
'ver': str},
'message': str}]}}
""" # noqa: E501
if not self.KEYS_MAPPER:
# Ensuring we are executing in the inheritor with required class attr defined
raise NotImplementedError(
"_KeyMappedJsonLogParserv3 misses KEYS_MAPPER definition"
)
error_data = {
"error": "ModSecurity was not compiled with JSON support."
}
if data == error_data:
logger.warning(
"SecAuditLogFormat set to JSON, but "
"ModSecurity was not compiled with JSON support."
)
raise _MiscDataNotInteresting()
expected_data_keys = {"transaction"}
if data.keys() != expected_data_keys:
raise ParseError("Not expected json data for Coraza/ModSecurityV3")
# to avoid possible key format changes in ModSecurity
# convert dict keys to lowercase
data = self._to_lower_keys(data)
result = {}
transaction = data["transaction"]
additional_data_is_required = ModsecSensor.SEND_ADDITIONAL_DATA
# section A
try:
result[ATTACKERS_IP] = str(
ipaddress.ip_address(transaction["client_ip"])
)
except ValueError:
logger.warning(
"Cannot use %s for IPv4 or IPv6 address",
transaction["client_ip"],
)
raise _MiscDataNotInteresting()
transaction_id = transaction.get(
self.KEYS_MAPPER["transaction_id"], None
)
if transaction_id is None:
raise ParseError(
"Missing unique id field in json data for Coraza/ModSecurityV3"
)
# looks like there could be both string and int values,
# so let's make it a string
result["transaction_id"] = str(transaction_id)
result[HEADERS] = []
for header, value in transaction["request"].get("headers", {}).items():
(
maybe_obfuscated_value,
is_required,
) = obfuscate_if_sensitive_and_required(header, value)
if is_required:
result[HEADERS].append([header, maybe_obfuscated_value])
result[HTTP_METHOD] = transaction["request"]["method"]
parsed = urllib.parse.urlparse(transaction["request"]["uri"])
result[URI] = parsed.path
if additional_data_is_required:
result[QUERY_PARAMS] = obfuscate(parsed.query)
# section C
if transaction["request"].get("body") and additional_data_is_required:
result[FORM_PARAMS] = obfuscate(transaction["request"]["body"])
# section F
result[STATUS_CODE] = transaction["response"].get(
self.KEYS_MAPPER["status_code"]
)
# section H
if transaction.get("producer"):
result["vendor"] = transaction["producer"]["components"]
version, _ = get_producer_data(
transaction["producer"]["modsecurity"]
)
result["modsec_version"] = version
result[ENGINE_MODE] = transaction["producer"].get(
self.KEYS_MAPPER["secrules_engine"]
)
if transaction.get("messages"):
result["MessageList"] = []
for msg in transaction["messages"]:
message = msg["details"]
message["rule"] = str(message.pop("ruleid"))
message["msg"] = (
msg["msg"] if "msg" in msg else msg.get("message")
)
# Headers might be in lowercase e.g.:
# tests/core/fixtures/test_modsec_audit_log/
# test_parser_log_sample-13_json_v3
# test_parser_log_sample-29_pretty_json_litespeed
headers = transaction["request"]["headers"]
hostname = headers.get("Host") or headers.get("host")
message["hostname"] = hostname
message["access_denied"] = result["status_code"] == 403
if message.get("msg"):
message.update(get_anomality_score_items(message["msg"]))
result["MessageList"].append(message)
return result
class _JsonLogParserv3(_KeyMappedJsonLogParserv3):
KEYS_MAPPER = {
"transaction_id": "unique_id",
"status_code": "http_code",
"secrules_engine": "secrules_engine",
}
class _JsonLogParserCoraza(_KeyMappedJsonLogParserv3):
KEYS_MAPPER = {
"transaction_id": "id",
"status_code": "status",
"secrules_engine": "rules_engine",
}
class _JsonPrettyLogParser(_JsonLogParserv3):
def feed(self, bytes_):
line = self._adaptive_decode_bytes(bytes_.rstrip())
if line:
if line == "{":
if self._accumulating_lines:
self._accumulating_lines = []
raise ParseError(
"Error occurs while parse json line %r, "
"not empty line buffer on start new json"
)
else:
if not self._accumulating_lines:
raise ParseError(
"Error occurs while parse json line %r, "
"empty line buffer in a middle of json dict"
)
self._accumulating_lines.append(line)
if line == "}":
try:
self._accumulating_result = self.parse_json_data(
json.loads("".join(self._accumulating_lines))
)
return self.flush()
except _MiscDataNotInteresting:
return None
except Exception as e:
raise ParseError(
"Error occurs while parse json line %r, reason: %r"
% (line, e)
) from e
class _NativeLogParser(Parser):
"""
audit log parser state machine
Parse log entry ModSecurity 2 Data for Native audit log format.
https://github.com/SpiderLabs/ModSecurity/wiki/ModSecurity-2-Data-Formats#Audit_Log
More about ModSecurity log message format
https://gerrit.cloudlinux.com/plugins/gitiles/defence360/+/refs/changes/73/111973/1/opt/DEF-21076-multiline-modsec-header-field-alert/README.txt
Expected data:
--2172df61-A--
[02/Feb/2018:11:49:22 +0000] WnRQQvAURie7wq9bOfH25AAAAAE ::1 57480 ::1 80
--2172df61-B--
POST /1/request?x=yxz&z=xy HTTP/1.1
User-Agent: Nessus
Host: localhost
Accept: */*
cOOkIe: ABC=abc;SESSIONID=hash256
Content-Length: 9
Content-Type: application/x-www-form-urlencoded
--2172df61-C--
bc=d123&cd=e&bc=a0
--2172df61-F--
HTTP/1.1 404 Not Found
Accept-Ranges: bytes
Transfer-Encoding: chunked
Content-Type: text/html
--2172df61-H--
Message: Warning. Matched phrase "nessus" at REQUEST_HEADERS:User-Agent. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/103_Global_Agents.conf"] [line "17"] [id "210801"] [rev "2"] [msg "COMODO WAF: Request Indicates a Security Scanner Scanned the Site||localhost|F|2"] [data "nessus"] [severity "CRITICAL"] [tag "CWAF"] [tag "Agents"]
Message: Warning. Pattern match "(?i:(?:^(?:microsoft url|user-Agent|www\\.weblogs\\.com|(?:jakart|vi)a|(google|i{0,1}explorer{0,1}\\.exe|(ms){0,1}ie( [0-9.]{1,}){0,1} {0,1}(compatible( browser){0,1}){0,1})$)|\\bdatacha0s\\b|; widows|\\\\r|a(?: href=|d(?:sarobot|vanced email extractor ..." at REQUEST_HEADERS:User-Agent. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/103_Global_Agents.conf"] [line "29"] [id "210831"] [rev "2"] [msg "COMODO WAF: Rogue web site crawler||localhost|F|4"] [data "Nessus"] [severity "WARNING"] [tag "CWAF"] [tag "Agents"]
Message: Warning. Operator GE matched 5 at TX:incoming_points. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/122_Outgoing_FiltersEnd.conf"] [line "35"] [id "214930"] [rev "1"] [msg "COMODO WAF: Inbound Points Exceeded|Total Incoming Points: 8|localhost|F|2"] [severity "CRITICAL"] [tag "CWAF"] [tag "FiltersEnd"]
Apache-Handler: default-handler
Stopwatch: 1517572162346260 98908 (- - -)
Stopwatch2: 1517572162346260 98908; combined=33129, p1=737, p2=32228, p3=0, p4=0, p5=163, sr=0, sw=1, l=0, gc=0
Producer: ModSecurity for Apache/2.9.2 (http://www.modsecurity.org/); CWAF_Apache.
Server: Apache
Engine-Mode: "DETECTION_ONLY"
--2172df61-Z--
""" # noqa: E501
_AVAILABLE_SUBPARSERS = (
_SectionAParser,
_SectionBParser,
_SectionCParser,
_SectionFParser,
_SectionHParser,
_SectionZParser,
_SectionNotInterestedInParser,
)
def __init__(self, **_):
self._state_parser = None
self._accumulating_result = {}
self._accumulating_lines = []
def feed(self, bytes_):
self._accumulating_lines.append(bytes_)
return self._feed_impl(bytes_)
def _feed_impl(self, bytes_):
line = self._adaptive_decode_bytes(bytes_.rstrip())
if not line:
return None
try:
# peek next available parser
next_parser = next(
parser
for parser in self._AVAILABLE_SUBPARSERS
if parser.is_applicable_since(line)
)
if next_parser is _SectionZParser:
return self.flush()
except StopIteration:
if self._state_parser is None:
raise ParseError("No parser for line %r", line)
else:
self._state_parser = next_parser()
# waiting for the next feed()
return None
name_value_tokens = self.parse_name_value_tokens(line)
for name, value in name_value_tokens:
# continue accumulating result
if name == "Message":
self._accumulating_result.setdefault("MessageList", []).append(
value
)
# save meta data from ModSecurity itself
# if Message data was not found before
elif (
name == "ModSecurity"
and "MessageList" not in self._accumulating_result
):
self._accumulating_result.setdefault("MessageList", []).append(
value
)
elif name == HEADERS:
self._accumulating_result.setdefault(HEADERS, []).append(value)
elif name == "Producer":
self._parse_producer_filed(value)
else:
self._accumulating_result[name] = value
return None
def _parse_producer_filed(self, line):
modsec_ver, vendors = get_producer_data(line)
if modsec_ver is not None:
self._accumulating_result["modsec_version"] = modsec_ver
if vendors is not None:
self._accumulating_result["vendor"] = vendors
def parse_name_value_tokens(self, line):
"""
Gets the result of parse_name_value_tokens and
returns it as a list
If ValueError excepted while message processing
raises ParseError
In cases when we should not interrupt parse process
returns empty list.
"""
try:
return list(self._state_parser.parse_name_value_tokens(line))
except ValueError as e:
raise ParseError(str(e)) from e
except (_MiscDataNotInteresting, _AmbiguousSeverity):
# continue accumulating result
return []
def flush(self):
try:
return mk_modsec_indicent_list(
self._accumulating_result, debug_ctx=self._accumulating_lines
)
finally:
self._state_parser = None
self._accumulating_result = {}
self._accumulating_lines = []
class SerialLogParser(_RevolverParser):
_SKIP_FIRST_NUM_ERRORS = 0 # don't skip errors
_available_parsers = [
_JsonLogParserv2,
_JsonLogParserv3,
_NativeLogParser,
_JsonLogParserCoraza,
_JsonPrettyLogParser,
]
@classmethod
def parse_file(cls, filepath):
"""do-it-all style"""
parser = cls()
with open(filepath, "rb") as filestream:
for lineno, bytes_ in enumerate(filestream):
try:
result = parser.feed(bytes_)
if result is not None:
# then we are done
return result
except ParseError as e:
# return file:line in exception for better debug experience
raise MalformedFileError(
"Error in audit log file %r line %d",
filepath,
# use line 1 as base (most text editors do):
lineno + 1,
) from e
# return at least something
logger.error(
"Incomplete audit log file %r: %r",
filepath,
parser._current_parser._accumulating_lines,
)
return parser.flush()
class ConcurrentLogParser(Parser):
def __init__(self, audit_logdir_path):
"""
:param str audit_logdir_path: audit log dir path for ModSecurity
concurrent mode
"""
self._audit_logdir_path = audit_logdir_path
def _normconcat(self, token):
"""
:param str: token is expected to start with posixpath.sep
"""
return self._audit_logdir_path + token
def _directadmin_concat(self, token):
"""
:param str: token is expected to start with posixpath.sep
"""
return (
self._audit_logdir_path
+ posixpath.sep
+ token.split(posixpath.sep)[1]
+ token
)
def feed(self, bytes_):
try:
str_ = bytes_.decode()
except UnicodeDecodeError:
# 'pass' to raise
# ParseError("No audit log found in %r", bytes_) then
pass
else:
reversed_tokens = reversed([t.strip("[]") for t in str_.split()])
filtered_tokens = filter(os.path.isabs, reversed_tokens)
for token, concat_fun in product(
filtered_tokens, (self._normconcat, self._directadmin_concat)
):
try:
log_path = None
token_path = concat_fun(token)
if os.path.isfile(token_path): # Modsecurity v2
log_path = token_path
elif os.path.isfile(token): # Modsecurity v3
log_path = token
except (UnicodeEncodeError, ValueError):
# another corner case handling:
# 'pass' is to raise
# ParseError("No audit log found in %r", bytes_) then
pass
else:
logger.debug(
"os.path.isfile({!r}) = {!r}".format(
(token_path, token), bool(log_path)
)
)
with suppress(FileNotFoundError): # handle race condition
if log_path and os.path.getsize(log_path) > 0:
return SerialLogParser.parse_file(log_path)
raise ParseError("No audit log found in %r", bytes_)
class RevolverParser(_RevolverParser):
_available_parsers = [SerialLogParser, ConcurrentLogParser]
class _IncidentFixupList:
class InvalidIncident(RuntimeError):
"""
For incidents we cannot use data from, e.g.
"Message: Rule processing failed."
"""
pass
@classmethod
def apply(cls, incidents, debug_ctx):
for incident in incidents:
incident = cls._fixup_camel_case(incident)
try:
cls._fixup_msg_inplace(incident)
cls._fixup_host_tag_inplace(incident)
cls._fixup_severity_inplace(incident)
cls._fixup_useragent_inplace(incident)
yield incident
except _IncidentFixupList.InvalidIncident:
if "[msg " in incident["message"]:
cls._fixup_unparsed_message(incident)
yield incident
@classmethod
def _fixup_unparsed_message(cls, incident):
incident["tag"] = ["noshow"]
incident["severity"] = 7
incident["message"] = incident["message"].partition("[msg")[2]
@classmethod
def _fixup_camel_case(cls, incident):
"""
make "Host" be in the same case as "msg", "rule", etc.
"""
return {k.lower(): v for k, v in incident.items()}
@classmethod
def _fixup_msg_inplace(cls, incident):
if incident.get("msg") is None:
raise cls.InvalidIncident()
else:
# to avoid KeyError: 'message' in sensor_incident_aggregate.py
msg = incident.pop("msg")
incident["message"] = msg
# Try to extract 'constant' part as name
parts = msg.split("||", maxsplit=1)
incident["name"] = parts[0]
@classmethod
def _fixup_useragent_inplace(cls, incident):
"""
fixup 'user-agent' to 'useragent' to match agent sqlitedb naming
"""
if "user-agent" in incident:
incident["user_agent"] = incident.pop("user-agent")
@classmethod
def _fixup_host_tag_inplace(cls, incident):
"""
remove "Host: %hostname%" field duplicate in tag
"""
if "tag" in incident:
host = next(
(
value
for header, value in incident.get(ADVANCED, {}).get(
HEADERS, []
)
if header == "Host"
),
None,
)
if host:
# remove "Host: %hostname%" field duplicate in tag
incident["tag"] = [
tag for tag in incident["tag"] if tag != "Host: %s" % host
]
@classmethod
def _fixup_severity_inplace(cls, incident):
"""
map modsec severity string <-> ossec modsec severity int,
incident table expects int for severity and
also consistent severity level is good for ML.
github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual#severity
"""
map_ = {
# Severe attack - No chances of false positives.
# Immediate attention is necessary.
"EMERGENCY": 0,
# High importance security event.
"ALERT": 1,
# Multiple user generated errors
"CRITICAL": 2,
# First time seen
"ERROR": 3,
# System low priority error
"WARNING": 4,
# Successful/Authorized events
"NOTICE": 5,
# System low priority notification
"INFO": 6,
# - None -
"DEBUG": 7,
}
severity = incident.get("severity")
if severity is None:
# this will be OK with imunify360.db
return
try:
incident["severity"] = int(severity) # for modsec 3.x
return
except ValueError:
pass
try:
incident["severity"] = map_[severity]
except KeyError:
logger.error(
"Cannot measure severity level for %s literal in %s plugin",
repr(severity),
repr(ModsecSensor.PLUGIN_ID),
)
def mk_modsec_indicent_list(top_level_tokens, debug_ctx=None):
"""
unroll modsec audit log into incident list
:param top_level_tokens: see how unit test describe this data structure
:param debug_ctx: to be shown in sentry incident
:return list:
"""
raw_incidents = []
if "MessageList" in top_level_tokens:
user_id = user_identity(
top_level_tokens.get(ATTACKERS_IP, ""),
dict(top_level_tokens.get(HEADERS, [])),
)
for message_enclosing_tokens in top_level_tokens["MessageList"]:
incident = {
"method": "INCIDENT",
"plugin_id": ModsecSensor.PLUGIN_ID,
}
# We lookup PICK_SECAUDITLOG_FIELDS fields
# first in "Message: [name value] [name value]" tokens
# then in "Host: %name%", "User-Agent: %ua" top level fields
update_dict = dict(
(
field,
message_enclosing_tokens.get(field)
or top_level_tokens.get(field),
)
for field in PICK_SECAUDITLOG_FIELDS
if field in message_enclosing_tokens
or field in top_level_tokens
)
incident.update(update_dict)
# generate `advanced` section
incident[ADVANCED] = {HEADERS: top_level_tokens.get(HEADERS, [])}
for field in URI, HTTP_METHOD:
if top_level_tokens.get(field):
incident[ADVANCED][field] = top_level_tokens[field]
incident.update({USER_IDENTITY_FIELD: user_id})
raw_incidents.append(incident)
result = [*_IncidentFixupList.apply(raw_incidents, debug_ctx)]
return result
def get_anomality_score_items(msg: str) -> dict:
result = {}
for score_type, value in _PARSE_SCORE_REGEX.findall(msg):
key = "%s_anomality_score" % score_type.lower()
assert key in ANOMALITY_SCORE_FIELDS, (
"invalid anomality score key %s detected" % key
)
result[key] = value
return result
def obfuscate_if_sensitive_and_required(
name: str, value: str
) -> Tuple[Optional[str], bool]:
"""
Return value/obfuscated value if header is (not) sensitive
and whether it is required
"""
result_value = obfuscate_item_if_sensitive(name, value)
is_required = result_value == value or ModsecSensor.SEND_ADDITIONAL_DATA
return (result_value, is_required)
def obfuscate_item_if_sensitive(name, value) -> Optional[str]:
"""
If header name is 'authorization' or 'cookie', then obfuscate it
so as not to disclosure sensitive client info
"""
name = name.lower()
return (
obfuscate_cookie(value)
if name == _COOKIE
else obfuscate_item(value)
if name in _SENSITIVE_HEADERS
else value
)
def obfuscate_cookie(cookie):
try:
sc = SimpleCookie(cookie)
except CookieError as e:
value = str(e)
else:
value = [[k, obfuscate_item(v.value)] for k, v in sorted(sc.items())]
return value
def _obfuscate_items(data: dict):
result = {}
for k, items in data.items():
for item in items:
result.setdefault(k, []).append(obfuscate_item(item))
return result
def obfuscate(query):
return _obfuscate_items(parse_qs(query))
def obfuscate_item(item: Optional[str]) -> Optional[str]:
if not item: # nothing to obfuscate
return item
# In this example we are not do urldecode for every param in query
# We need to do it and check it always
obf_buff = []
pos = 0
special_dict = {
0x20: "[space]",
0x09: "[tab]",
0x0A: "[LF]",
0x0D: "[CR]",
0x00: "[NULL]",
}
item = list(item)
while pos < len(item):
x = item[pos]
if ord(x) in special_dict.keys():
obf_buff.append(special_dict[ord(x)])
pos += 1
else:
if str(x).isalpha():
start_pos = pos
while pos < len(item):
x = item[pos]
if not str(x).isalpha():
break
pos += 1
end_pos = pos
if (end_pos - start_pos) <= 1:
obf_buff.append("[chr]")
else:
obf_buff.append("[chr]{%d}" % (end_pos - start_pos))
continue
if str(x).isnumeric():
start_pos = pos
while pos < len(item):
x = item[pos]
if not str(x).isnumeric():
break
pos += 1
end_pos = pos
if (end_pos - start_pos) <= 1:
obf_buff.append("[digit]")
else:
obf_buff.append("[digit]{%d}" % (end_pos - start_pos))
continue
obf_buff.append(x)
pos += 1
return "".join(obf_buff)
def get_producer_data(line: str) -> tuple:
# ModSecurity for Apache/2.9.0 (# http://www.modsecurity.org/);
# CWAF_Apache.
vendors = None
modsec_ver = None
if ";" in line:
version, *vendors = line.split(";")
vendors = list(map(lambda s: s.strip(".| "), vendors))
else:
# ModSecurity for Apache/2.5.5
version = line
match = re.search(r"\d\.\d\.\d", version)
if match:
modsec_ver = match.group()
return modsec_ver, vendors