hero-story-crawer

王者荣耀故事站 node爬虫

Stars
31

heroStory

--

ps: pc

  • nuxt + koa2 + vue2.0 + vuex + nginx + pm2

pc

ttp://storyweb.naice.me/

https://github.com/naihe138/hero-story-crawer

next/koa cheerio``request-promise utf-8iconv gihub

// 
import rp from 'request-promise'
import cheerio from 'cheerio'
import { writeFileSync } from 'fs'

const Iconv = require('iconv').Iconv
const iconv = new Iconv('GBK', 'UTF-8')

// request  VPN
// import Agent from 'socks5-http-client/lib/Agent'

// 
const getHeroStory = async() => {
  // request-promise
  const options = {
    uri: 'https://pvp.qq.com/act/a20160510story/herostory.htm',
    // agentClass: Agent,
    // agentOptions: {
    //   socksHost: 'localhost',
    //   socksPort: 1080 //  VPN  shadowsocks
    // },
    transform: body => cheerio.load(body) // 
  }
  // cheerio$
  const $ = await rp(options)
  let navArr = []
  let heroList = []
  $('#campNav li').each(function () {
    // 
    const type = $(this).attr('data-camptype')
    const text = $(this).find('a').text()
    // push navArr
    navArr.push({ type, text })
  })
  // 
  const hreodata = await rp({
    uri: 'https://pvp.qq.com/webplat/info/news_version3/15592/18024/23901/24397/24398/m17330/list_1.shtml'
  })
  // 
  let str = hreodata.replace('createHeroList(', '')
  str = str.substr(0, str.length - 1)
  let r = JSON.parse(str)
  heroList = r.data.filter(item => item)

  let result = {
    nav: navArr,
    heroList
  }
  // 
  writeFileSync('./server/crawerdb/heroList.json', JSON.stringify(result, null, 2), 'utf-8')

  return result
}

//  id url 
const getHeroDatail = async(url, _id) => {
  // 
  const option = {
    encoding: null,
    url
  }
  // 
  const $ = await rp(option).then(body => {
    // 
    var result = iconv.convert(new Buffer(body, 'binary')).toString()
    return cheerio.load(result)
  })
  // $ jq
  // 
  let heroName = ''
  let heroDetail = []
  let ht = ''
  let hc = ''
  if ($('#heroStory').length) {
    heroName = $('.hero_name pf').text()
    $('#heroStory p').each(function () {
      let text = $(this).text().trim()
      heroDetail.push({
        type: 'text',
        text: text
      })
    })
  } else if ($('.textboxs').length) {
    $('.textboxs p').each(function () {
      if ($(this).find('img').length) {
        let src = $(this).find('img').attr('src')
        heroDetail.push({
          type: 'img',
          text: 'https:' + src
        })
      } else {
        let text = $(this).text().trim()
        heroDetail.push({
          type: 'text',
          text: text
        })
      }
    })
  }
  let hStr = ($('#history_content').text()).replace(/(^\s+)|(\s+$)/g, '');

  if (hStr.length > 0) {
    ht = $('.history_story h3').text()
    hc = $('#history_content').text()
  }
  let result = {
    heroName,
    heroDetail,
    historyTitle: ht,
    historyContent: hc
  }
  // 
  writeFileSync('./server/crawerdb/herodetail' + _id + '.json', JSON.stringify(result, null, 2), 'utf-8')
  return result
}

export default {
  getHeroStory,
  getHeroDatail
}

koa

nuxt

Nuxt.js Vue.js /Nuxt.js UI Node.js Nuxt.jsNuxt.js Vue.js

page vue nuxt.config.jswebpack vue

.nuxt/
build/  ---
components/ ---
layout/   ---
pages/    ---
--| about.vue/
--| music.vue/
--| word.vue/
--| skin/
--| index.vue
--| ....
server/  --koa 
static/  ---
store/  --vuex

vue .

demo.wxml

<view> Hello {{name}}! </view>
<view wx:for="{{array}}">
  {{index}}: {{item.message}}
</view>

demo.js

Page({
  data: {
    name: '',
    array: [{
      message: 'foo',
    }, {
      message: 'bar'
    }]
  }
})

- demo,pages api

() 44

github

https://github.com/naihe138/heroStory https://github.com/naihe138/hero-story-crawer

start