diff --git a/monitoring/docker-compose.yaml b/monitoring/docker-compose.yaml index 910ea98..2a87bea 100644 --- a/monitoring/docker-compose.yaml +++ b/monitoring/docker-compose.yaml @@ -16,7 +16,6 @@ services: - "traefik.http.services.monitoring.loadbalancer.server.port=3000" - "traefik.docker.network=web" - "docker.group=monitoring" - restart: unless-stopped environment: - GF_DEFAULT_INSTANCE_NAME=monitoring.${DOMAIN} - GF_SERVER_ROOT_URL=http://monitoring.${DOMAIN} @@ -38,7 +37,7 @@ services: grafanadb: - image: postgres + image: postgres:13 volumes: - ${VOLUMES_PATH}/grafanadb:/var/lib/postgresql/data networks: @@ -53,11 +52,8 @@ services: influxdb: -# image: arm32v7/influxdb image: influxdb:1.8 restart: unless-stopped - ports: - - 8086:8086 networks: - web - monitoring @@ -67,13 +63,18 @@ services: - ${VOLUMES_PATH}/influxdb/:/var/lib/influxdb - ${PWD}/influxdb.conf:/etc/influxdb/influxdb.conf:ro labels: + - "traefik.enable=true" + - "traefik.http.routers.influxdb.rule=Host(`influxdb.${DOMAIN}`)" + - "traefik.http.routers.influxdb.entrypoints=web" + - "traefik.http.services.influxdb.loadbalancer.server.port=8086" + - "traefik.docker.network=web" - "docker.group=monitoring" ################################################################## # here starts data collection - telegraf: + telegraf_host: image: telegraf:1.18 restart: unless-stopped environment: @@ -84,8 +85,7 @@ services: - HOST_VAR=/hostfs/var - HOST_RUN=/hostfs/run volumes: - - ./telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro - - /var/run/docker.sock:/var/run/docker.sock:ro + - ./telegraf/telegraf_host.conf:/etc/telegraf/telegraf.conf:ro - /var/run/utmp:/var/run/utmp:ro - /:/hostfs:ro network_mode: "host" @@ -93,10 +93,26 @@ services: - "docker.group=monitoring" depends_on: - influxdb + + + telegraf_net: + image: telegraf:1.18 + restart: unless-stopped + volumes: + - ./telegraf/telegraf_net.conf:/etc/telegraf/telegraf.conf:ro + networks: + - monitoring + - dockersocket + labels: + - "docker.group=monitoring" + depends_on: + - influxdb dns: - - 9.9.9.9 + - 192.168.16.5 networks: monitoring: web: external: true + dockersocket: + external: true diff --git a/monitoring/telegraf/telegraf.conf b/monitoring/telegraf/telegraf_host.conf similarity index 70% rename from monitoring/telegraf/telegraf.conf rename to monitoring/telegraf/telegraf_host.conf index 297df15..6249272 100644 --- a/monitoring/telegraf/telegraf.conf +++ b/monitoring/telegraf/telegraf_host.conf @@ -80,7 +80,7 @@ # urls = ["unix:///var/run/influxdb.sock"] # urls = ["udp://127.0.0.1:8089"] # urls = ["http://127.0.0.1:8086"] - urls = ["http://192.168.16.5:8086"] # required + urls = ["http://influxdb.lan"] # required ############################################################################### @@ -170,141 +170,6 @@ # fielddrop = ["uptime_format"] - -# Read metrics about docker containers -[[inputs.docker]] - ## Docker Endpoint - ## To use TCP, set endpoint = "tcp://[ip]:[port]" - ## To use environment variables (ie, docker-machine), set endpoint = "ENV" - endpoint = "unix:///var/run/docker.sock" - - ## Set to true to collect Swarm metrics(desired_replicas, running_replicas) - gather_services = false - - ## Only collect metrics for these containers, collect all if empty - container_names = [] - - ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars - source_tag = false - - ## Containers to include and exclude. Globs accepted. - ## Note that an empty array for both will include all containers - container_name_include = [] - container_name_exclude = [] - - ## Container states to include and exclude. Globs accepted. - ## When empty only containers in the "running" state will be captured. - ## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] - ## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] - # container_state_include = [] - # container_state_exclude = [] - - ## Timeout for docker list, info, and stats commands - timeout = "5s" - - ## Whether to report for each container per-device blkio (8:0, 8:1...) and - ## network (eth0, eth1, ...) stats or not - perdevice = true - - ## Whether to report for each container total blkio and network stats or not - total = false - - ## Which environment variables should we use as a tag - ##tag_env = ["JAVA_HOME", "HEAP_SIZE"] - - ## docker labels to include and exclude as tags. Globs accepted. - ## Note that an empty array for both will include all labels as tags - docker_label_include = [] - docker_label_exclude = [] - - ## Optional TLS Config - # tls_ca = "/etc/telegraf/ca.pem" - # tls_cert = "/etc/telegraf/cert.pem" - # tls_key = "/etc/telegraf/key.pem" - ## Use TLS but skip chain & host verification - # insecure_skip_verify = false - - -# # Monitor disks' temperatures using hddtemp -# [[inputs.hddtemp]] -# ## By default, telegraf gathers temps data from all disks detected by the -# ## hddtemp. -# ## -# ## Only collect temps from the selected disks. -# ## -# ## A * as the device name will return the temperature values of all disks. -# ## -# # address = "127.0.0.1:7634" -# # devices = ["sda", "*"] - - -# HTTP/HTTPS request given an address a method and a timeout -[[inputs.http_response]] - ## Deprecated in 1.12, use 'urls' - ## Server address (default http://localhost) - # address = "http://localhost" - - ## List of urls to query. - urls = [ - "https://florianzirker.de", - "https://cloud.florianzirker.de/login", - "https://wallabag.florianzirker.de/login", - "https://gitea.florianzirker.de/api/v1/version/", - "https://meet.florianzirker.de/", - #"https://www.feuerwehr-kapsweyer.de", - "https://ping.feuerwehr-kapsweyer.de", - "http://ping.feuerwehr-kapsweyer.de", - "http://portainer.lan", - "http://gpxviewer.lan", - "http://traefik.lan/dashboard/", - "http://heimdall.lan", - "http://monitoring.lan/login", - #"http://solarmaxpi.zirker.lan" - ] - interval = "60s" - - ## Set http_proxy (telegraf uses the system wide proxy settings if it's is not set) - # http_proxy = "http://localhost:8888" - - ## Set response_timeout (default 5 seconds) - response_timeout = "10s" - - ## HTTP Request Method - method = "GET" - - ## Whether to follow redirects from the server (defaults to false) - follow_redirects = false - - ## Optional HTTP Request Body - # body = ''' - # {'fake':'data'} - # ''' - - ## Optional substring or regex match in body of the response - # response_string_match = "\"service_status\": \"up\"" - # response_string_match = "ok" - # response_string_match = "\".*_status\".?:.?\"up\"" - - ## Optional TLS Config - # tls_ca = "/etc/telegraf/ca.pem" - # tls_cert = "/etc/telegraf/cert.pem" - # tls_key = "/etc/telegraf/key.pem" - ## Use TLS but skip chain & host verification - # insecure_skip_verify = false - - ## HTTP Request Headers (all values must be strings) - # [inputs.http_response.headers] - # Host = "github.com" - - ## Interface to use when dialing an address - # interface = "eth0" - -# # Collect statistics about itself -# [[inputs.internal]] -# ## If true, collect telegraf memory stats. -# # collect_memstats = true - - # # Read metrics about network interface usage [[inputs.net]] ## By default, telegraf gathers stats from any up interface (excluding loopback) @@ -337,47 +202,6 @@ interfaces = ["enx001e0636be71"] dump_zeros = true -# Ping given url(s) and return statistics -[[inputs.ping]] - ## List of urls to ping - urls = [ - "fritz-box.lan", - "wlan-ap.lan", - "drax.lan", - "florianzirker.de", - "t-online.de", - "8.8.8.8", - "4.2.2.2", - "9.9.9.9", - "example.com" - ] - - ## Number of pings to send per collection (ping -c ) - # count = 1 - - ## Interval, in s, at which to ping. 0 == default (ping -i ) - ## Not available in Windows. - ping_interval = 60.0 - - ## Per-ping timeout, in s. 0 == no timeout (ping -W ) - # timeout = 1.0 - - ## Total-ping deadline, in s. 0 == no deadline (ping -w ) - # deadline = 10 - - ## Interface or source address to send ping from (ping -I ) - ## on Darwin and Freebsd only source address possible: (ping -S ) - # interface = "" - - ## Specify the ping executable binary, default is "ping" - # binary = "ping" - - ## Arguments for ping command - ## when arguments is not empty, other options (ping_interval, timeout, etc) will be ignored - # arguments = ["-c", "3"] - - - # # Monitor process cpu and memory usage # [[inputs.procstat]] # ## PID file to monitor process diff --git a/monitoring/telegraf/telegraf_net.conf b/monitoring/telegraf/telegraf_net.conf new file mode 100644 index 0000000..c95263f --- /dev/null +++ b/monitoring/telegraf/telegraf_net.conf @@ -0,0 +1,262 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) + +# Config Sample under https://github.com/influxdata/telegraf/blob/master/etc/telegraf.conf + +# Global tags can be specified here in key="value" format. +[global_tags] + # datacenter + dc="fzirker.lan" + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## Maximum number of unwritten metrics per output. Increasing this value + ## allows for longer periods of output downtime without dropping metrics at the + ## cost of higher maximum memory usage. + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. Maximum flush_interval will be + ## flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Configuration for sending metrics to InfluxDB +[[outputs.influxdb]] + ## The full HTTP or UDP URL for your InfluxDB instance. + ## + ## Multiple URLs can be specified for a single cluster, only ONE of the + ## urls will be written to each interval. + # urls = ["unix:///var/run/influxdb.sock"] + # urls = ["udp://127.0.0.1:8089"] + # urls = ["http://127.0.0.1:8086"] + urls = ["http://influxdb.lan"] # required + + +############################################################################### +# INPUT PLUGINS # +############################################################################### + +# Read metrics about docker containers +[[inputs.docker]] + ## Docker Endpoint + ## To use TCP, set endpoint = "tcp://[ip]:[port]" + ## To use environment variables (ie, docker-machine), set endpoint = "ENV" + #endpoint = "unix:///var/run/docker.sock" + endpoint = "tcp://docker-socket-proxy:2375" + + ## Set to true to collect Swarm metrics(desired_replicas, running_replicas) + gather_services = false + + ## Only collect metrics for these containers, collect all if empty + container_names = [] + + ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars + source_tag = false + + ## Containers to include and exclude. Globs accepted. + ## Note that an empty array for both will include all containers + container_name_include = [] + container_name_exclude = [] + + ## Container states to include and exclude. Globs accepted. + ## When empty only containers in the "running" state will be captured. + ## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] + ## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"] + # container_state_include = [] + # container_state_exclude = [] + + ## Timeout for docker list, info, and stats commands + timeout = "5s" + + ## Whether to report for each container per-device blkio (8:0, 8:1...) and + ## network (eth0, eth1, ...) stats or not + perdevice = true + + ## Whether to report for each container total blkio and network stats or not + total = false + + ## Which environment variables should we use as a tag + ##tag_env = ["JAVA_HOME", "HEAP_SIZE"] + + ## docker labels to include and exclude as tags. Globs accepted. + ## Note that an empty array for both will include all labels as tags + docker_label_include = [] + docker_label_exclude = [] + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false + + +# # Monitor disks' temperatures using hddtemp +# [[inputs.hddtemp]] +# ## By default, telegraf gathers temps data from all disks detected by the +# ## hddtemp. +# ## +# ## Only collect temps from the selected disks. +# ## +# ## A * as the device name will return the temperature values of all disks. +# ## +# # address = "127.0.0.1:7634" +# # devices = ["sda", "*"] + + +# HTTP/HTTPS request given an address a method and a timeout +[[inputs.http_response]] + ## Deprecated in 1.12, use 'urls' + ## Server address (default http://localhost) + # address = "http://localhost" + + ## List of urls to query. + urls = [ + "https://florianzirker.de", + "https://cloud.florianzirker.de/login", + "https://wallabag.florianzirker.de/login", + "https://gitea.florianzirker.de/api/v1/version/", + "https://meet.florianzirker.de/", + #"https://www.feuerwehr-kapsweyer.de", + "https://ping.feuerwehr-kapsweyer.de", + "http://ping.feuerwehr-kapsweyer.de", + "http://portainer.lan", + "http://gpxviewer.lan", + "http://traefik.lan/dashboard/", + "http://heimdall.lan", + "http://monitoring.lan/login", + #"http://solarmaxpi.zirker.lan" + ] + interval = "60s" + + ## Set http_proxy (telegraf uses the system wide proxy settings if it's is not set) + # http_proxy = "http://localhost:8888" + + ## Set response_timeout (default 5 seconds) + response_timeout = "10s" + + ## HTTP Request Method + method = "GET" + + ## Whether to follow redirects from the server (defaults to false) + follow_redirects = false + + ## Optional HTTP Request Body + # body = ''' + # {'fake':'data'} + # ''' + + ## Optional substring or regex match in body of the response + # response_string_match = "\"service_status\": \"up\"" + # response_string_match = "ok" + # response_string_match = "\".*_status\".?:.?\"up\"" + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false + + ## HTTP Request Headers (all values must be strings) + # [inputs.http_response.headers] + # Host = "github.com" + + ## Interface to use when dialing an address + # interface = "eth0" + +# # Collect statistics about itself +# [[inputs.internal]] +# ## If true, collect telegraf memory stats. +# # collect_memstats = true + + +# Ping given url(s) and return statistics +[[inputs.ping]] + ## List of urls to ping + urls = [ + "fritz-box.lan", + "wlan-ap.lan", + "drax.lan", + "florianzirker.de", + "t-online.de", + "8.8.8.8", + "4.2.2.2", + "9.9.9.9", + "example.com" + ] + + ## Number of pings to send per collection (ping -c ) + # count = 1 + + ## Interval, in s, at which to ping. 0 == default (ping -i ) + ## Not available in Windows. + ping_interval = 60.0 + + ## Per-ping timeout, in s. 0 == no timeout (ping -W ) + # timeout = 1.0 + + ## Total-ping deadline, in s. 0 == no deadline (ping -w ) + # deadline = 10 + + ## Interface or source address to send ping from (ping -I ) + ## on Darwin and Freebsd only source address possible: (ping -S ) + # interface = "" + + ## Specify the ping executable binary, default is "ping" + # binary = "ping" + + ## Arguments for ping command + ## when arguments is not empty, other options (ping_interval, timeout, etc) will be ignored + # arguments = ["-c", "3"] \ No newline at end of file diff --git a/proxy/docker-compose.yaml b/proxy/docker-compose.yaml index 1be2b11..04a0016 100644 --- a/proxy/docker-compose.yaml +++ b/proxy/docker-compose.yaml @@ -24,6 +24,20 @@ services: - host.docker.internal:172.17.0.1 + docker-socket-proxy: + image: tecnativa/docker-socket-proxy + restart: unless-stopped + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + # grant privileges as environment variables: https://github.com/Tecnativa/docker-socket-proxy#grant-or-revoke-access-to-certain-api-sections + - CONTAINERS=1 + - INFO=1 + networks: + - dockersocket + privileged: true + + whoami: image: containous/whoami networks: @@ -40,4 +54,6 @@ services: networks: web: external: true + dockersocket: + external: true