Foi necessário colocar outro servidor em casa, e me propus a monitorar seu desempenho em uma casa inteligente doméstica, que é usada pelo Assistente Doméstico. Pesquisas rápidas e ponderadas no Google não me deram soluções universais, então construí minha própria bicicleta.
Introdutório: monitoraremos a carga e a temperatura do processador, a carga da RAM e do swap, o espaço livre em disco, a duração do tempo de atividade, a carga total do sistema, a temperatura e o estado dos discos inteligentes separadamente e o estado do ataque (em um servidor com ubuntu server 20, um simples software raid1 foi criado) ... Unidades WD Green, placa-mãe GA-525 com atom525 integrado.
O corretor mosquitto já foi configurado no servidor doméstico inteligente, então mqtt foi escolhido como o método de transferência de dados.
Nas primeiras seções deste trabalho, são apresentados os princípios dos métodos de coleta de dados aplicados e, ao final, os scripts de transferência de dados e as configurações de HA.
Todos os comandos nos exemplos são executados como root.
Índice
Coletando sensores do sistema
Coletando
dados de carga do sistema Coletando dados de
saúde do disco rígido Coletando
dados de estado do RAID Enviando dados coletados
Configurando o Home Assistant
Leituras do sensor do sistema
Para obter os sensores integrados, usaremos o utilitário de sensores
Se não estiver instalado, coloque-o: apt-get install lm-sensors
Primeiro, você precisa encontrar todos os sensores disponíveis. Executamos o comando sensors-detect
e respondemos a todas as perguntas y . Depois disso, você pode ver o que aconteceu:sensors
Deve-se notar que, pessoalmente, meus sensores começaram a exibir todos os sensores encontrados somente após uma reinicialização. Talvez algum tipo de bug, não sei.
. sensors json, . sensors -A -u -j
json. , .
, . . json - jp. - ubuntu :
apt-get install jq
xpath . , -.
. , , , temp3, :
sensors -A -u -j | jq '.["coretemp-isa-0000"]["Core 0"].temp2_input'
sensors -A -u -j | jq '.["it8720-isa-0290"].fan1.fan1_input'
sensors -A -u -j | jq '.["it8720-isa-0290"].temp3.temp3_input'
, , , , .
. - free. , -m, .
, . - , .
free -m | grep "Mem" | awk '{print $2}'
grep , awk - , . , . .
, df. , , , . - , . : df
df | grep "/dev/md127p1" | awk '{print $5}' | sed 's/%$//'
df | grep "/dev/md126p1" | awk '{print $5}' | sed 's/%$//'
/proc/loadavg. , - , . , , / 1, 5 15 . . , ( ) , '? 15 :
cat /proc/loadavg | awk '{print $3}'
uptime:
uptime | awk '{print $3}' | sed 's/,$//'
mpstat. , , . , , . , , , . mpstat , apt install sysstat. ,
mpstat | grep all | awk '{print $13}'
, .
, , . bash . bc
cpuidle=$(mpstat | grep all | awk '{print $13}')
cpuload=$(echo "100-$cpuidle" | bc -l)
echo " : $cpuload"
hddtemp. , :
apt-get install hddtemp
: , -n :
SMART smartmontools
apt-get install smartmontools
, -a, .
smartctl -a /dev/sda
, . , . . :
Raw_Read_Error_Rate — . , . , . . , ;
Reallocated_Sector_Ct — . ;
Seek_Error_Rate — . ;
Spin_Retry_Count — . ;
Reallocated_Event_Count — ;
Offline_Uncorrectable — . .
, - json. -j, :
smartctl -a -j /dev/sda
xpath, jq, ( ):
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[0].raw.value' #Raw_Read_Error_Rate
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[3].raw.value' #Reallocated_Sector_Ct
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[4].raw.value' #Seek_Error_Rate
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[6].raw.value' #Spin_Retry_Count
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[12].raw.value' #Reallocated_Event_Count
smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[14].raw.value' #Offline_Uncorrectable
, " - " - -H, . -j, json.
json:
smartctl -a /dev/sda -j | jq '.smart_status.passed' #smart_status
, ()
, , , cron . .
smartctl -t short /dev/sda
, 2
smartctl -t long /dev/sda
, 1 .
, , smartd, , . , . smartd .
RAID
, cat /proc/mdstat
- :
echo 'check' >/sys/block/md126/md/sync_action
echo 'check' >/sys/block/md127/md/sync_action
cat /sys/block/md126/md/mismatch_cnt
cat /sys/block/md127/md/mismatch_cnt
0, .
, .
mosquitto, :
apt-get install mosquitto-clients
- , . - ( ), ( raid ), ( smart):
touch system.sh && touch drives.sh && touch smart.sh
chmod u+x system.sh && chmod u+x drives.sh && chmod u+x smart.sh
:
system.sh
#!/bin/bash
#
ip=xx.xx.xx.xx
usr="xx"
pass="xx"
tempdrive1=$(hddtemp "/dev/sda" -n)
echo " 1: $tempdrive1"
tempdrive2=$(hddtemp "/dev/sdb" -n)
echo " 2: $tempdrive2"
tempcpu=$(sensors -A -u -j | jq '.["coretemp-isa-0000"]["Core 0"].temp2_input')
echo " : $tempcpu"
fan=$(sensors -A -u -j | jq '.["it8720-isa-0290"].fan1.fan1_input')
echo " : $fan"
temp3=$(sensors -A -u -j | jq '.["it8720-isa-0290"].temp3.temp3_input')
echo " : $temp3"
totalram=$(free -m | grep "Mem" | awk '{print $2}')
echo " : $totalram"
usedram=$(free -m | grep "Mem" | awk '{print $3}')
echo " : $usedram"
usedrampercent=$(($usedram * 100 / $totalram))
echo " : $usedrampercent"
totalswap=$(free -m | grep "Swap" | awk '{print $2}')
echo " : $totalswap"
usedswap=$(free -m | grep "Swap" | awk '{print $3}')
echo " : $usedswap"
usedswappercent=$(($usedswap * 100 / $totalswap))
echo " : $usedswappercent"
averageload=$(cat /proc/loadavg | awk '{print $3}')
echo " : $averageload"
uptimedata=$(uptime | awk '{print $3}' | sed 's/,$//')
echo ": $uptimedata"
cpuidle=$(mpstat | grep all | awk '{print $13}')
cpuload=$(echo "100-$cpuidle" | bc -l) # , bash
echo " : $cpuload"
echo " "
echo " "
mosquitto_pub -h $ip -t "srv/tempdrive1" -m $tempdrive1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/tempdrive2" -m $tempdrive2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/tempcpu" -m $tempcpu -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/fan" -m $fan -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/temp3" -m $temp3 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/usedrampercent" -m $usedrampercent -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/usedswappercent" -m $usedswappercent -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/averageload" -m $averageload -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/uptimedata" -m $uptimedata -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/cpuload" -m $cpuload -u $usr -P $pass
drives.sh
#!/bin/bash
#
ip=xx.xx.xx.xx
usr="xx"
pass="xx"
raid_system_status=$(cat /sys/block/md126/md/mismatch_cnt)
echo " RAID : $raid_system_status"
raid_var_status=$(cat /sys/block/md127/md/mismatch_cnt)
echo " RAID : $raid_var_status"
freesystemdisk=$(df | grep "/dev/md127p1" | awk '{print $5}' | sed 's/%$//')
echo " : $freesystemdisk"
freedatadisk=$(df | grep "/dev/md126p1" | awk '{print $5}' | sed 's/%$//')
echo " : $freedatadisk"
echo " "
echo " "
mosquitto_pub -h $ip -t "srv/raid_system_status" -m $raid_system_status -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/raid_var_status" -m $raid_var_status -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/freesystemdisk" -m $freesystemdisk -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/freedatadisk" -m $freedatadisk -u $usr -P $pass
smart.sh
#!/bin/bash
#
ip=xx.xx.xx.xx
usr="xx"
pass="xx"
Raw_Read_Error_Rate1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[0].raw.value')
echo "SMART Raw_Read_Error_Rate 1: $Raw_Read_Error_Rate1"
Reallocated_Sector_Ct1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[3].raw.value')
echo "SMART Reallocated_Sector_Ct 1: $Reallocated_Sector_Ct1"
Seek_Error_Rate1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[4].raw.value')
echo "SMART Seek_Error_Rate 1: $Seek_Error_Rate1"
Spin_Retry_Count1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[6].raw.value')
echo "SMART Spin_Retry_Count 1: $Spin_Retry_Count1"
Reallocated_Event_Count1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[12].raw.value')
echo "SMART Reallocated_Event_Count 1: $Reallocated_Event_Count1"
Offline_Uncorrectable1=$(smartctl -a /dev/sda -j | jq '.ata_smart_attributes.table[14].raw.value')
echo "SMART Offline_Uncorrectable 1: $Offline_Uncorrectable1"
smart_status1=$(smartctl -a /dev/sda -j | jq '.smart_status.passed')
echo " 1: $smart_status1"
Raw_Read_Error_Rate2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[0].raw.value')
echo "SMART Raw_Read_Error_Rate 2: $Raw_Read_Error_Rate2"
Reallocated_Sector_Ct2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[3].raw.value')
echo "SMART Reallocated_Sector_Ct 2: $Reallocated_Sector_Ct2"
Seek_Error_Rate2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[4].raw.value')
echo "SMART Seek_Error_Rate 2: $Seek_Error_Rate2"
Spin_Retry_Count2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[6].raw.value')
echo "SMART Spin_Retry_Count 2: $Spin_Retry_Count2"
Reallocated_Event_Count2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[12].raw.value')
echo "SMART Reallocated_Event_Count 2: $Reallocated_Event_Count2"
Offline_Uncorrectable2=$(smartctl -a /dev/sdb -j | jq '.ata_smart_attributes.table[14].raw.value')
echo "SMART Offline_Uncorrectable 2: $Offline_Uncorrectable2"
smart_status2=$(smartctl -a /dev/sdb -j | jq '.smart_status.passed')
echo " 2: $smart_status2"
echo " "
echo " "
mosquitto_pub -h $ip -t "srv/Raw_Read_Error_Rate1" -m $Raw_Read_Error_Rate1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Reallocated_Sector_Ct1" -m $Reallocated_Sector_Ct1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Seek_Error_Rate1" -m $Seek_Error_Rate1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Spin_Retry_Count1" -m $Spin_Retry_Count1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Reallocated_Event_Count1" -m $Reallocated_Event_Count1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Offline_Uncorrectable1" -m $Offline_Uncorrectable1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Raw_Read_Error_Rate2" -m $Raw_Read_Error_Rate2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Reallocated_Sector_Ct2" -m $Reallocated_Sector_Ct2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Seek_Error_Rate2" -m $Seek_Error_Rate2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Spin_Retry_Count2" -m $Spin_Retry_Count2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Reallocated_Event_Count2" -m $Reallocated_Event_Count2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/Offline_Uncorrectable2" -m $Offline_Uncorrectable2 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/smart_status1" -m $smart_status1 -u $usr -P $pass
mosquitto_pub -h $ip -t "srv/smart_status2" -m $smart_status2 -u $usr -P $pass
, Mosquitto broker Home Assistant
, , , .
Home Assistant
, . Home Assistant .
sensor:
- platform: mqtt
state_topic: "srv/tempdrive1"
name: " nextcloud 1"
unit_of_measurement: °C
- platform: mqtt
state_topic: "srv/tempdrive2"
name: " nextcloud 2"
unit_of_measurement: °C
- platform: mqtt
state_topic: "srv/tempcpu"
name: " nextcloud "
unit_of_measurement: °C
- platform: mqtt
state_topic: "srv/fan"
name: " nextcloud "
unit_of_measurement: ppm
- platform: mqtt
state_topic: "srv/temp3"
name: " nextcloud "
unit_of_measurement: °C
- platform: mqtt
state_topic: "srv/usedrampercent"
name: " nextcloud RAM"
unit_of_measurement: "%"
- platform: mqtt
state_topic: "srv/usedswappercent"
name: " nextcloud SWAP"
unit_of_measurement: "%"
- platform: mqtt
state_topic: "srv/freesystemdisk"
name: " nextcloud "
unit_of_measurement: "%"
- platform: mqtt
state_topic: "srv/freedatadisk"
name: " nextcloud "
unit_of_measurement: "%"
- platform: mqtt
state_topic: "srv/averageload"
name: " nextcloud "
- platform: mqtt
state_topic: "srv/uptimedata"
name: " nextcloud "
- platform: mqtt
state_topic: "srv/cpuload"
name: " nextcloud "
unit_of_measurement: "%"
- platform: mqtt
state_topic: "srv/Raw_Read_Error_Rate1"
name: " nextcloud 1 SMART Raw_Read_Error_Rate"
- platform: mqtt
state_topic: "srv/Reallocated_Sector_Ct1"
name: " nextcloud 1 SMART Reallocated_Sector_Ct"
- platform: mqtt
state_topic: "srv/Seek_Error_Rate1"
name: " nextcloud 1 SMART Seek_Error_Rate"
- platform: mqtt
state_topic: "srv/Spin_Retry_Count1"
name: " nextcloud 1 SMART Spin_Retry_Count"
- platform: mqtt
state_topic: "srv/Reallocated_Event_Count1"
name: " nextcloud 1 SMART Reallocated_Event_Count"
- platform: mqtt
state_topic: "srv/Offline_Uncorrectable1"
name: " nextcloud 1 SMART Offline_Uncorrectable"
- platform: mqtt
state_topic: "srv/smart_status1"
name: " nextcloud 1 SMART "
- platform: mqtt
state_topic: "srv/Raw_Read_Error_Rate2"
name: " nextcloud 2 SMART Raw_Read_Error_Rate"
- platform: mqtt
state_topic: "srv/Reallocated_Sector_Ct2"
name: " nextcloud 2 SMART Reallocated_Sector_Ct"
- platform: mqtt
state_topic: "srv/Seek_Error_Rate2"
name: " nextcloud 2 SMART Seek_Error_Rate"
- platform: mqtt
state_topic: "srv/Spin_Retry_Count2"
name: " nextcloud 2 SMART Spin_Retry_Count"
- platform: mqtt
state_topic: "srv/Reallocated_Event_Count2"
name: " nextcloud 2 SMART Reallocated_Event_Count"
- platform: mqtt
state_topic: "srv/Offline_Uncorrectable2"
name: " nextcloud 2 SMART Offline_Uncorrectable"
- platform: mqtt
state_topic: "srv/smart_status2"
name: " nextcloud 2 SMART "
- platform: mqtt
state_topic: "srv/raid_system_status"
name: " nextcloud RAID "
- platform: mqtt
state_topic: "srv/raid_var_status"
name: " nextcloud RAID "
, , , ! . , , . :
, . , , smart .
- , . , . → → mqtt.
- linux , , , .
- . , . , .
A imagem mostra que o servidor discutido está planejado para o nextcloud. Seus indicadores internos também podem ser perfeitamente adicionados ao HA, para isso existe uma API maravilhosa. E HA tem integração embutida.